locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <errno.h>
  24 #include <error.h>
  25 #include <stdlib.h>
  26 #include <wchar.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35
  36 /* Uncomment the following line in the production version.  */
  37 /* #define NDEBUG 1 */
  38 #include <assert.h>
  39
  40 #define obstack_chunk_alloc malloc
  41 #define obstack_chunk_free free
  42
  43 static inline void
  44 __attribute ((always_inline))
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 __attribute ((always_inline))
  55 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  56 {
  57   if (sizeof (int32_t) == sizeof (int))
  58     obstack_int_grow_fast (obstack, data);
  59   else
  60     obstack_grow (obstack, &data, sizeof (int32_t));
  61 }
  62
  63 /* Forward declaration.  */
  64 struct element_t;
  65
  66 /* Data type for list of strings.  */
  67 struct section_list
  68 {
  69   /* Successor in the known_sections list.  */
  70   struct section_list *def_next;
  71   /* Successor in the sections list.  */
  72   struct section_list *next;
  73   /* Name of the section.  */
  74   const char *name;
  75   /* First element of this section.  */
  76   struct element_t *first;
  77   /* Last element of this section.  */
  78   struct element_t *last;
  79   /* These are the rules for this section.  */
  80   enum coll_sort_rule *rules;
  81   /* Index of the rule set in the appropriate section of the output file.  */
  82   int ruleidx;
  83 };
  84
  85 struct element_t;
  86
  87 struct element_list_t
  88 {
  89   /* Number of elements.  */
  90   int cnt;
  91
  92   struct element_t **w;
  93 };
  94
  95 /* Data type for collating element.  */
  96 struct element_t
  97 {
  98   const char *name;
  99
 100   const char *mbs;
 101   size_t nmbs;
 102   const uint32_t *wcs;
 103   size_t nwcs;
 104   int *mborder;
 105   int wcorder;
 106
 107   /* The following is a bit mask which bits are set if this element is
 108      used in the appropriate level.  Interesting for the singlebyte
 109      weight computation.
 110
 111      XXX The type here restricts the number of levels to 32.  It could
 112      be changed if necessary but I doubt this is necessary.  */
 113   unsigned int used_in_level;
 114
 115   struct element_list_t *weights;
 116
 117   /* Nonzero if this is a real character definition.  */
 118   int is_character;
 119
 120   /* Order of the character in the sequence.  This information will
 121      be used in range expressions.  */
 122   int mbseqorder;
 123   int wcseqorder;
 124
 125   /* Where does the definition come from.  */
 126   const char *file;
 127   size_t line;
 128
 129   /* Which section does this belong to.  */
 130   struct section_list *section;
 131
 132   /* Predecessor and successor in the order list.  */
 133   struct element_t *last;
 134   struct element_t *next;
 135
 136   /* Next element in multibyte output list.  */
 137   struct element_t *mbnext;
 138   struct element_t *mblast;
 139
 140   /* Next element in wide character output list.  */
 141   struct element_t *wcnext;
 142   struct element_t *wclast;
 143 };
 144
 145 /* Special element value.  */
 146 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 147 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 148 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 149
 150 /* Data type for collating symbol.  */
 151 struct symbol_t
 152 {
 153   const char *name;
 154
 155   /* Point to place in the order list.  */
 156   struct element_t *order;
 157
 158   /* Where does the definition come from.  */
 159   const char *file;
 160   size_t line;
 161 };
 162
 163 /* Sparse table of struct element_t *.  */
 164 #define TABLE wchead_table
 165 #define ELEMENT struct element_t *
 166 #define DEFAULT NULL
 167 #define ITERATE
 168 #define NO_FINALIZE
 169 #include "3level.h"
 170
 171 /* Sparse table of int32_t.  */
 172 #define TABLE collidx_table
 173 #define ELEMENT int32_t
 174 #define DEFAULT 0
 175 #include "3level.h"
 176
 177 /* Sparse table of uint32_t.  */
 178 #define TABLE collseq_table
 179 #define ELEMENT uint32_t
 180 #define DEFAULT ~((uint32_t) 0)
 181 #include "3level.h"
 182
 183
 184 /* Simple name list for the preprocessor.  */
 185 struct name_list
 186 {
 187   struct name_list *next;
 188   char str[0];
 189 };
 190
 191
 192 /* The real definition of the struct for the LC_COLLATE locale.  */
 193 struct locale_collate_t
 194 {
 195   int col_weight_max;
 196   int cur_weight_max;
 197
 198   /* List of known scripts.  */
 199   struct section_list *known_sections;
 200   /* List of used sections.  */
 201   struct section_list *sections;
 202   /* Current section using definition.  */
 203   struct section_list *current_section;
 204   /* There always can be an unnamed section.  */
 205   struct section_list unnamed_section;
 206   /* To make handling of errors easier we have another section.  */
 207   struct section_list error_section;
 208   /* Sometimes we are defining the values for collating symbols before
 209      the first actual section.  */
 210   struct section_list symbol_section;
 211
 212   /* Start of the order list.  */
 213   struct element_t *start;
 214
 215   /* The undefined element.  */
 216   struct element_t undefined;
 217
 218   /* This is the cursor for `reorder_after' insertions.  */
 219   struct element_t *cursor;
 220
 221   /* This value is used when handling ellipsis.  */
 222   struct element_t ellipsis_weight;
 223
 224   /* Known collating elements.  */
 225   hash_table elem_table;
 226
 227   /* Known collating symbols.  */
 228   hash_table sym_table;
 229
 230   /* Known collation sequences.  */
 231   hash_table seq_table;
 232
 233   struct obstack mempool;
 234
 235   /* The LC_COLLATE category is a bit special as it is sometimes possible
 236      that the definitions from more than one input file contains information.
 237      Therefore we keep all relevant input in a list.  */
 238   struct locale_collate_t *next;
 239
 240   /* Arrays with heads of the list for each of the leading bytes in
 241      the multibyte sequences.  */
 242   struct element_t *mbheads[256];
 243
 244   /* Arrays with heads of the list for each of the leading bytes in
 245      the multibyte sequences.  */
 246   struct wchead_table wcheads;
 247
 248   /* The arrays with the collation sequence order.  */
 249   unsigned char mbseqorder[256];
 250   struct collseq_table wcseqorder;
 251
 252   /* State of the preprocessor.  */
 253   enum
 254     {
 255       else_none = 0,
 256       else_ignore,
 257       else_seen
 258     }
 259     else_action;
 260 };
 261
 262
 263 /* We have a few global variables which are used for reading all
 264    LC_COLLATE category descriptions in all files.  */
 265 static uint32_t nrules;
 266
 267 /* List of defined preprocessor symbols.  */
 268 static struct name_list *defined;
 269
 270
 271 /* We need UTF-8 encoding of numbers.  */
 272 static inline int
 273 __attribute ((always_inline))
 274 utf8_encode (char *buf, int val)
 275 {
 276   int retval;
 277
 278   if (val < 0x80)
 279     {
 280       *buf++ = (char) val;
 281       retval = 1;
 282     }
 283   else
 284     {
 285       int step;
 286
 287       for (step = 2; step < 6; ++step)
 288         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 289           break;
 290       retval = step;
 291
 292       *buf = (unsigned char) (~0xff >> step);
 293       --step;
 294       do
 295         {
 296           buf[step] = 0x80 | (val & 0x3f);
 297           val >>= 6;
 298         }
 299       while (--step > 0);
 300       *buf |= val;
 301     }
 302
 303   return retval;
 304 }
 305
 306
 307 static struct section_list *
 308 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 309                    struct section_list *next)
 310 {
 311   struct section_list *newp;
 312
 313   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 314                                                 sizeof (*newp));
 315   newp->next = next;
 316   newp->name = string;
 317   newp->first = NULL;
 318   newp->last = NULL;
 319
 320   return newp;
 321 }
 322
 323
 324 static struct element_t *
 325 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 326              const uint32_t *wcs, const char *name, size_t namelen,
 327              int is_character)
 328 {
 329   struct element_t *newp;
 330
 331   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 332                                              sizeof (*newp));
 333   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 334                                                     name, namelen);
 335   if (mbs != NULL)
 336     {
 337       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 338       newp->nmbs = mbslen;
 339     }
 340   else
 341     {
 342       newp->mbs = NULL;
 343       newp->nmbs = 0;
 344     }
 345   if (wcs != NULL)
 346     {
 347       size_t nwcs = wcslen ((wchar_t *) wcs);
 348       uint32_t zero = 0;
 349       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 350       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 351       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 352       newp->nwcs = nwcs;
 353     }
 354   else
 355     {
 356       newp->wcs = NULL;
 357       newp->nwcs = 0;
 358     }
 359   newp->mborder = NULL;
 360   newp->wcorder = 0;
 361   newp->used_in_level = 0;
 362   newp->is_character = is_character;
 363
 364   /* Will be assigned later.  XXX  */
 365   newp->mbseqorder = 0;
 366   newp->wcseqorder = 0;
 367
 368   /* Will be allocated later.  */
 369   newp->weights = NULL;
 370
 371   newp->file = NULL;
 372   newp->line = 0;
 373
 374   newp->section = collate->current_section;
 375
 376   newp->last = NULL;
 377   newp->next = NULL;
 378
 379   newp->mbnext = NULL;
 380   newp->mblast = NULL;
 381
 382   newp->wcnext = NULL;
 383   newp->wclast = NULL;
 384
 385   return newp;
 386 }
 387
 388
 389 static struct symbol_t *
 390 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 391 {
 392   struct symbol_t *newp;
 393
 394   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 395
 396   newp->name = obstack_copy0 (&collate->mempool, name, len);
 397   newp->order = NULL;
 398
 399   newp->file = NULL;
 400   newp->line = 0;
 401
 402   return newp;
 403 }
 404
 405
 406 /* Test whether this name is already defined somewhere.  */
 407 static int
 408 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 409                  const struct charmap_t *charmap,
 410                  struct repertoire_t *repertoire, const char *symbol,
 411                  size_t symbol_len)
 412 {
 413   void *ignore = NULL;
 414
 415   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 416     {
 417       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 418                 (int) symbol_len, symbol);
 419       return 1;
 420     }
 421
 422   if (repertoire != NULL
 423       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 424           == 0))
 425     {
 426       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 427                 (int) symbol_len, symbol);
 428       return 1;
 429     }
 430
 431   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 432     {
 433       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 434                 (int) symbol_len, symbol);
 435       return 1;
 436     }
 437
 438   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 439     {
 440       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 441                 (int) symbol_len, symbol);
 442       return 1;
 443     }
 444
 445   return 0;
 446 }
 447
 448
 449 /* Read the direction specification.  */
 450 static void
 451 read_directions (struct linereader *ldfile, struct token *arg,
 452                  const struct charmap_t *charmap,
 453                  struct repertoire_t *repertoire, struct localedef_t *result)
 454 {
 455   int cnt = 0;
 456   int max = nrules ?: 10;
 457   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 458   int warned = 0;
 459   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 460
 461   while (1)
 462     {
 463       int valid = 0;
 464
 465       if (arg->tok == tok_forward)
 466         {
 467           if (rules[cnt] & sort_backward)
 468             {
 469               if (! warned)
 470                 {
 471                   lr_error (ldfile, _("\
 472 %s: `forward' and `backward' are mutually excluding each other"),
 473                             "LC_COLLATE");
 474                   warned = 1;
 475                 }
 476             }
 477           else if (rules[cnt] & sort_forward)
 478             {
 479               if (! warned)
 480                 {
 481                   lr_error (ldfile, _("\
 482 %s: `%s' mentioned more than once in definition of weight %d"),
 483                             "LC_COLLATE", "forward", cnt + 1);
 484                 }
 485             }
 486           else
 487             rules[cnt] |= sort_forward;
 488
 489           valid = 1;
 490         }
 491       else if (arg->tok == tok_backward)
 492         {
 493           if (rules[cnt] & sort_forward)
 494             {
 495               if (! warned)
 496                 {
 497                   lr_error (ldfile, _("\
 498 %s: `forward' and `backward' are mutually excluding each other"),
 499                             "LC_COLLATE");
 500                   warned = 1;
 501                 }
 502             }
 503           else if (rules[cnt] & sort_backward)
 504             {
 505               if (! warned)
 506                 {
 507                   lr_error (ldfile, _("\
 508 %s: `%s' mentioned more than once in definition of weight %d"),
 509                             "LC_COLLATE", "backward", cnt + 1);
 510                 }
 511             }
 512           else
 513             rules[cnt] |= sort_backward;
 514
 515           valid = 1;
 516         }
 517       else if (arg->tok == tok_position)
 518         {
 519           if (rules[cnt] & sort_position)
 520             {
 521               if (! warned)
 522                 {
 523                   lr_error (ldfile, _("\
 524 %s: `%s' mentioned more than once in definition of weight %d"),
 525                             "LC_COLLATE", "position", cnt + 1);
 526                 }
 527             }
 528           else
 529             rules[cnt] |= sort_position;
 530
 531           valid = 1;
 532         }
 533
 534       if (valid)
 535         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 536
 537       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 538           || arg->tok == tok_semicolon)
 539         {
 540           if (! valid && ! warned)
 541             {
 542               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 543               warned = 1;
 544             }
 545
 546           /* See whether we have to increment the counter.  */
 547           if (arg->tok != tok_comma && rules[cnt] != 0)
 548             {
 549               /* Add the default `forward' if we have seen only `position'.  */
 550               if (rules[cnt] == sort_position)
 551                 rules[cnt] = sort_position | sort_forward;
 552
 553               ++cnt;
 554             }
 555
 556           if (arg->tok == tok_eof || arg->tok == tok_eol)
 557             /* End of line or file, so we exit the loop.  */
 558             break;
 559
 560           if (nrules == 0)
 561             {
 562               /* See whether we have enough room in the array.  */
 563               if (cnt == max)
 564                 {
 565                   max += 10;
 566                   rules = (enum coll_sort_rule *) xrealloc (rules,
 567                                                             max
 568                                                             * sizeof (*rules));
 569                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 570                 }
 571             }
 572           else
 573             {
 574               if (cnt == nrules)
 575                 {
 576                   /* There must not be any more rule.  */
 577                   if (! warned)
 578                     {
 579                       lr_error (ldfile, _("\
 580 %s: too many rules; first entry only had %d"),
 581                                 "LC_COLLATE", nrules);
 582                       warned = 1;
 583                     }
 584
 585                   lr_ignore_rest (ldfile, 0);
 586                   break;
 587                 }
 588             }
 589         }
 590       else
 591         {
 592           if (! warned)
 593             {
 594               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 595               warned = 1;
 596             }
 597         }
 598
 599       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 600     }
 601
 602   if (nrules == 0)
 603     {
 604       /* Now we know how many rules we have.  */
 605       nrules = cnt;
 606       rules = (enum coll_sort_rule *) xrealloc (rules,
 607                                                 nrules * sizeof (*rules));
 608     }
 609   else
 610     {
 611       if (cnt < nrules)
 612         {
 613           /* Not enough rules in this specification.  */
 614           if (! warned)
 615             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 616
 617           do
 618             rules[cnt] = sort_forward;
 619           while (++cnt < nrules);
 620         }
 621     }
 622
 623   collate->current_section->rules = rules;
 624 }
 625
 626
 627 static struct element_t *
 628 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 629               const char *str, size_t len)
 630 {
 631   void *result = NULL;
 632
 633   /* Search for the entries among the collation sequences already define.  */
 634   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 635     {
 636       /* Nope, not define yet.  So we see whether it is a
 637          collation symbol.  */
 638       void *ptr;
 639
 640       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 641         {
 642           /* It's a collation symbol.  */
 643           struct symbol_t *sym = (struct symbol_t *) ptr;
 644           result = sym->order;
 645
 646           if (result == NULL)
 647             result = sym->order = new_element (collate, NULL, 0, NULL,
 648                                                NULL, 0, 0);
 649         }
 650       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 651         {
 652           /* It's also no collation element.  So it is a character
 653              element defined later.  */
 654           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 655           /* Insert it into the sequence table.  */
 656           insert_entry (&collate->seq_table, str, len, result);
 657         }
 658     }
 659
 660   return (struct element_t *) result;
 661 }
 662
 663
 664 static void
 665 unlink_element (struct locale_collate_t *collate)
 666 {
 667   if (collate->cursor == collate->start)
 668     {
 669       assert (collate->cursor->next == NULL);
 670       assert (collate->cursor->last == NULL);
 671       collate->cursor = NULL;
 672     }
 673   else
 674     {
 675       if (collate->cursor->next != NULL)
 676         collate->cursor->next->last = collate->cursor->last;
 677       if (collate->cursor->last != NULL)
 678         collate->cursor->last->next = collate->cursor->next;
 679       collate->cursor = collate->cursor->last;
 680     }
 681 }
 682
 683
 684 static void
 685 insert_weights (struct linereader *ldfile, struct element_t *elem,
 686                 const struct charmap_t *charmap,
 687                 struct repertoire_t *repertoire, struct localedef_t *result,
 688                 enum token_t ellipsis)
 689 {
 690   int weight_cnt;
 691   struct token *arg;
 692   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 693
 694   /* Initialize all the fields.  */
 695   elem->file = ldfile->fname;
 696   elem->line = ldfile->lineno;
 697
 698   elem->last = collate->cursor;
 699   elem->next = collate->cursor ? collate->cursor->next : NULL;
 700   if (collate->cursor != NULL && collate->cursor->next != NULL)
 701     collate->cursor->next->last = elem;
 702   if (collate->cursor != NULL)
 703     collate->cursor->next = elem;
 704   if (collate->start == NULL)
 705     {
 706       assert (collate->cursor == NULL);
 707       collate->start = elem;
 708     }
 709
 710   elem->section = collate->current_section;
 711
 712   if (collate->current_section->first == NULL)
 713     collate->current_section->first = elem;
 714   if (collate->current_section->last == collate->cursor)
 715     collate->current_section->last = elem;
 716
 717   collate->cursor = elem;
 718
 719   elem->weights = (struct element_list_t *)
 720     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 721   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 722
 723   weight_cnt = 0;
 724
 725   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 726   do
 727     {
 728       if (arg->tok == tok_eof || arg->tok == tok_eol)
 729         break;
 730
 731       if (arg->tok == tok_ignore)
 732         {
 733           /* The weight for this level has to be ignored.  We use the
 734              null pointer to indicate this.  */
 735           elem->weights[weight_cnt].w = (struct element_t **)
 736             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 737           elem->weights[weight_cnt].w[0] = NULL;
 738           elem->weights[weight_cnt].cnt = 1;
 739         }
 740       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 741         {
 742           char ucs4str[10];
 743           struct element_t *val;
 744           char *symstr;
 745           size_t symlen;
 746
 747           if (arg->tok == tok_bsymbol)
 748             {
 749               symstr = arg->val.str.startmb;
 750               symlen = arg->val.str.lenmb;
 751             }
 752           else
 753             {
 754               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 755               symstr = ucs4str;
 756               symlen = 9;
 757             }
 758
 759           val = find_element (ldfile, collate, symstr, symlen);
 760           if (val == NULL)
 761             break;
 762
 763           elem->weights[weight_cnt].w = (struct element_t **)
 764             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 765           elem->weights[weight_cnt].w[0] = val;
 766           elem->weights[weight_cnt].cnt = 1;
 767         }
 768       else if (arg->tok == tok_string)
 769         {
 770           /* Split the string up in the individual characters and put
 771              the element definitions in the list.  */
 772           const char *cp = arg->val.str.startmb;
 773           int cnt = 0;
 774           struct element_t *charelem;
 775           struct element_t **weights = NULL;
 776           int max = 0;
 777
 778           if (*cp == '\0')
 779             {
 780               lr_error (ldfile, _("%s: empty weight string not allowed"),
 781                         "LC_COLLATE");
 782               lr_ignore_rest (ldfile, 0);
 783               break;
 784             }
 785
 786           do
 787             {
 788               if (*cp == '<')
 789                 {
 790                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 791                      the latter we have to unify the name.  */
 792                   const char *startp = ++cp;
 793                   size_t len;
 794
 795                   while (*cp != '>')
 796                     {
 797                       if (*cp == ldfile->escape_char)
 798                         ++cp;
 799                       if (*cp == '\0')
 800                         /* It's a syntax error.  */
 801                         goto syntax;
 802
 803                       ++cp;
 804                     }
 805
 806                   if (cp - startp == 5 && startp[0] == 'U'
 807                       && isxdigit (startp[1]) && isxdigit (startp[2])
 808                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 809                     {
 810                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 811                       char *newstr;
 812
 813                       newstr = (char *) xmalloc (10);
 814                       snprintf (newstr, 10, "U%08X", ucs4);
 815                       startp = newstr;
 816
 817                       len = 9;
 818                     }
 819                   else
 820                     len = cp - startp;
 821
 822                   charelem = find_element (ldfile, collate, startp, len);
 823                   ++cp;
 824                 }
 825               else
 826                 {
 827                   /* People really shouldn't use characters directly in
 828                      the string.  Especially since it's not really clear
 829                      what this means.  We interpret all characters in the
 830                      string as if that would be bsymbols.  Otherwise we
 831                      would have to match back to bsymbols somehow and this
 832                      is normally not what people normally expect.  */
 833                   charelem = find_element (ldfile, collate, cp++, 1);
 834                 }
 835
 836               if (charelem == NULL)
 837                 {
 838                   /* We ignore the rest of the line.  */
 839                   lr_ignore_rest (ldfile, 0);
 840                   break;
 841                 }
 842
 843               /* Add the pointer.  */
 844               if (cnt >= max)
 845                 {
 846                   struct element_t **newp;
 847                   max += 10;
 848                   newp = (struct element_t **)
 849                     alloca (max * sizeof (struct element_t *));
 850                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 851                   weights = newp;
 852                 }
 853               weights[cnt++] = charelem;
 854             }
 855           while (*cp != '\0');
 856
 857           /* Now store the information.  */
 858           elem->weights[weight_cnt].w = (struct element_t **)
 859             obstack_alloc (&collate->mempool,
 860                            cnt * sizeof (struct element_t *));
 861           memcpy (elem->weights[weight_cnt].w, weights,
 862                   cnt * sizeof (struct element_t *));
 863           elem->weights[weight_cnt].cnt = cnt;
 864
 865           /* We don't need the string anymore.  */
 866           free (arg->val.str.startmb);
 867         }
 868       else if (ellipsis != tok_none
 869                && (arg->tok == tok_ellipsis2
 870                    || arg->tok == tok_ellipsis3
 871                    || arg->tok == tok_ellipsis4))
 872         {
 873           /* It must be the same ellipsis as used in the initial column.  */
 874           if (arg->tok != ellipsis)
 875             lr_error (ldfile, _("\
 876 %s: weights must use the same ellipsis symbol as the name"),
 877                       "LC_COLLATE");
 878
 879           /* The weight for this level will depend on the element
 880              iterating over the range.  Put a placeholder.  */
 881           elem->weights[weight_cnt].w = (struct element_t **)
 882             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 883           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 884           elem->weights[weight_cnt].cnt = 1;
 885         }
 886       else
 887         {
 888         syntax:
 889           /* It's a syntax error.  */
 890           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 891           lr_ignore_rest (ldfile, 0);
 892           break;
 893         }
 894
 895       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 896       /* This better should be the end of the line or a semicolon.  */
 897       if (arg->tok == tok_semicolon)
 898         /* OK, ignore this and read the next token.  */
 899         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 900       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 901         {
 902           /* It's a syntax error.  */
 903           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 904           lr_ignore_rest (ldfile, 0);
 905           break;
 906         }
 907     }
 908   while (++weight_cnt < nrules);
 909
 910   if (weight_cnt < nrules)
 911     {
 912       /* This means the rest of the line uses the current element as
 913          the weight.  */
 914       do
 915         {
 916           elem->weights[weight_cnt].w = (struct element_t **)
 917             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 918           if (ellipsis == tok_none)
 919             elem->weights[weight_cnt].w[0] = elem;
 920           else
 921             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 922           elem->weights[weight_cnt].cnt = 1;
 923         }
 924       while (++weight_cnt < nrules);
 925     }
 926   else
 927     {
 928       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 929         {
 930           /* Too many rule values.  */
 931           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 932           lr_ignore_rest (ldfile, 0);
 933         }
 934       else
 935         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 936     }
 937 }
 938
 939
 940 static int
 941 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 942               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 943               struct localedef_t *result)
 944 {
 945   /* First find out what kind of symbol this is.  */
 946   struct charseq *seq;
 947   uint32_t wc;
 948   struct element_t *elem = NULL;
 949   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 950
 951   /* Try to find the character in the charmap.  */
 952   seq = charmap_find_value (charmap, symstr, symlen);
 953
 954   /* Determine the wide character.  */
 955   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 956     {
 957       wc = repertoire_find_value (repertoire, symstr, symlen);
 958       if (seq != NULL)
 959         seq->ucs4 = wc;
 960     }
 961   else
 962     wc = seq->ucs4;
 963
 964   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 965     {
 966       /* It's no character, so look through the collation elements and
 967          symbol list.  */
 968       void *ptr = elem;
 969       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 970         {
 971           void *result;
 972           struct symbol_t *sym = NULL;
 973
 974           /* It's also collation element.  Therefore it's either a
 975              collating symbol or it's a character which is not
 976              supported by the character set.  In the later case we
 977              simply create a dummy entry.  */
 978           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 979             {
 980               /* It's a collation symbol.  */
 981               sym = (struct symbol_t *) result;
 982
 983               elem = sym->order;
 984             }
 985
 986           if (elem == NULL)
 987             {
 988               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 989
 990               if (sym != NULL)
 991                 sym->order = elem;
 992               else
 993                 /* Enter a fake element in the sequence table.  This
 994                    won't cause anything in the output since there is
 995                    no multibyte or wide character associated with
 996                    it.  */
 997                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 998             }
 999         }
1000       else
1001         /* Copy the result back.  */
1002         elem = ptr;
1003     }
1004   else
1005     {
1006       /* Otherwise the symbols stands for a character.  */
1007       void *ptr = elem;
1008       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1009         {
1010           uint32_t wcs[2] = { wc, 0 };
1011
1012           /* We have to allocate an entry.  */
1013           elem = new_element (collate,
1014                               seq != NULL ? (char *) seq->bytes : NULL,
1015                               seq != NULL ? seq->nbytes : 0,
1016                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1017                               symstr, symlen, 1);
1018
1019           /* And add it to the table.  */
1020           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1021             /* This cannot happen.  */
1022             assert (! "Internal error");
1023         }
1024       else
1025         {
1026           /* Copy the result back.  */
1027           elem = ptr;
1028
1029           /* Maybe the character was used before the definition.  In this case
1030              we have to insert the byte sequences now.  */
1031           if (elem->mbs == NULL && seq != NULL)
1032             {
1033               elem->mbs = obstack_copy0 (&collate->mempool,
1034                                          seq->bytes, seq->nbytes);
1035               elem->nmbs = seq->nbytes;
1036             }
1037
1038           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1039             {
1040               uint32_t wcs[2] = { wc, 0 };
1041
1042               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1043               elem->nwcs = 1;
1044             }
1045         }
1046     }
1047
1048   /* Test whether this element is not already in the list.  */
1049   if (elem->next != NULL || elem == collate->cursor)
1050     {
1051       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1052                 (int) symlen, symstr, elem->file, elem->line);
1053       lr_ignore_rest (ldfile, 0);
1054       return 1;
1055     }
1056
1057   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1058
1059   return 0;
1060 }
1061
1062
1063 static void
1064 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1065                  enum token_t ellipsis, const struct charmap_t *charmap,
1066                  struct repertoire_t *repertoire,
1067                  struct localedef_t *result)
1068 {
1069   struct element_t *startp;
1070   struct element_t *endp;
1071   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1072
1073   /* Unlink the entry added for the ellipsis.  */
1074   unlink_element (collate);
1075   startp = collate->cursor;
1076
1077   /* Process and add the end-entry.  */
1078   if (symstr != NULL
1079       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1080     /* Something went wrong with inserting the to-value.  This means
1081        we cannot process the ellipsis.  */
1082     return;
1083
1084   /* Reset the cursor.  */
1085   collate->cursor = startp;
1086
1087   /* Now we have to handle many different situations:
1088      - we have to distinguish between the three different ellipsis forms
1089      - the is the ellipsis at the beginning, in the middle, or at the end.
1090   */
1091   endp = collate->cursor->next;
1092   assert (symstr == NULL || endp != NULL);
1093
1094   /* XXX The following is probably very wrong since also collating symbols
1095      can appear in ranges.  But do we want/can refine the test for that?  */
1096 #if 0
1097   /* Both, the start and the end symbol, must stand for characters.  */
1098   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1099       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1100     {
1101       lr_error (ldfile, _("\
1102 %s: the start and the end symbol of a range must stand for characters"),
1103                 "LC_COLLATE");
1104       return;
1105     }
1106 #endif
1107
1108   if (ellipsis == tok_ellipsis3)
1109     {
1110       /* One requirement we make here: the length of the byte
1111          sequences for the first and end character must be the same.
1112          This is mainly to prevent unwanted effects and this is often
1113          not what is wanted.  */
1114       size_t len = (startp->mbs != NULL ? startp->nmbs
1115                     : (endp->mbs != NULL ? endp->nmbs : 0));
1116       char mbcnt[len + 1];
1117       char mbend[len + 1];
1118
1119       /* Well, this should be caught somewhere else already.  Just to
1120          make sure.  */
1121       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1122       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1123
1124       if (startp != NULL && endp != NULL
1125           && startp->mbs != NULL && endp->mbs != NULL
1126           && startp->nmbs != endp->nmbs)
1127         {
1128           lr_error (ldfile, _("\
1129 %s: byte sequences of first and last character must have the same length"),
1130                     "LC_COLLATE");
1131           return;
1132         }
1133
1134       /* Determine whether we have to generate multibyte sequences.  */
1135       if ((startp == NULL || startp->mbs != NULL)
1136           && (endp == NULL || endp->mbs != NULL))
1137         {
1138           int cnt;
1139           int ret;
1140
1141           /* Prepare the beginning byte sequence.  This is either from the
1142              beginning byte sequence or it is all nulls if it was an
1143              initial ellipsis.  */
1144           if (startp == NULL || startp->mbs == NULL)
1145             memset (mbcnt, '\0', len);
1146           else
1147             {
1148               memcpy (mbcnt, startp->mbs, len);
1149
1150               /* And increment it so that the value is the first one we will
1151                  try to insert.  */
1152               for (cnt = len - 1; cnt >= 0; --cnt)
1153                 if (++mbcnt[cnt] != '\0')
1154                   break;
1155             }
1156           mbcnt[len] = '\0';
1157
1158           /* And the end sequence.  */
1159           if (endp == NULL || endp->mbs == NULL)
1160             memset (mbend, '\0', len);
1161           else
1162             memcpy (mbend, endp->mbs, len);
1163           mbend[len] = '\0';
1164
1165           /* Test whether we have a correct range.  */
1166           ret = memcmp (mbcnt, mbend, len);
1167           if (ret >= 0)
1168             {
1169               if (ret > 0)
1170                 lr_error (ldfile, _("%s: byte sequence of first character of \
1171 range is not lower than that of the last character"), "LC_COLLATE");
1172               return;
1173             }
1174
1175           /* Generate the byte sequences data.  */
1176           while (1)
1177             {
1178               struct charseq *seq;
1179
1180               /* Quite a bit of work ahead.  We have to find the character
1181                  definition for the byte sequence and then determine the
1182                  wide character belonging to it.  */
1183               seq = charmap_find_symbol (charmap, mbcnt, len);
1184               if (seq != NULL)
1185                 {
1186                   struct element_t *elem;
1187                   size_t namelen;
1188
1189                   /* I don't think this can ever happen.  */
1190                   assert (seq->name != NULL);
1191                   namelen = strlen (seq->name);
1192
1193                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1194                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1195                                                        namelen);
1196
1197                   /* Now we are ready to insert the new value in the
1198                      sequence.  Find out whether the element is
1199                      already known.  */
1200                   void *ptr;
1201                   if (find_entry (&collate->seq_table, seq->name, namelen,
1202                                   &ptr) != 0)
1203                     {
1204                       uint32_t wcs[2] = { seq->ucs4, 0 };
1205
1206                       /* We have to allocate an entry.  */
1207                       elem = new_element (collate, mbcnt, len,
1208                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1209                                           ? NULL : wcs, seq->name,
1210                                           namelen, 1);
1211
1212                       /* And add it to the table.  */
1213                       if (insert_entry (&collate->seq_table, seq->name,
1214                                         namelen, elem) != 0)
1215                         /* This cannot happen.  */
1216                         assert (! "Internal error");
1217                     }
1218                   else
1219                     /* Copy the result.  */
1220                     elem = ptr;
1221
1222                   /* Test whether this element is not already in the list.  */
1223                   if (elem->next != NULL || (collate->cursor != NULL
1224                                              && elem->next == collate->cursor))
1225                     {
1226                       lr_error (ldfile, _("\
1227 order for `%.*s' already defined at %s:%Zu"),
1228                                 (int) namelen, seq->name,
1229                                 elem->file, elem->line);
1230                       goto increment;
1231                     }
1232
1233                   /* Enqueue the new element.  */
1234                   elem->last = collate->cursor;
1235                   if (collate->cursor == NULL)
1236                     elem->next = NULL;
1237                   else
1238                     {
1239                       elem->next = collate->cursor->next;
1240                       elem->last->next = elem;
1241                       if (elem->next != NULL)
1242                         elem->next->last = elem;
1243                     }
1244                   if (collate->start == NULL)
1245                     {
1246                       assert (collate->cursor == NULL);
1247                       collate->start = elem;
1248                     }
1249                   collate->cursor = elem;
1250
1251                  /* Add the weight value.  We take them from the
1252                     `ellipsis_weights' member of `collate'.  */
1253                   elem->weights = (struct element_list_t *)
1254                     obstack_alloc (&collate->mempool,
1255                                    nrules * sizeof (struct element_list_t));
1256                   for (cnt = 0; cnt < nrules; ++cnt)
1257                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1258                         && (collate->ellipsis_weight.weights[cnt].w[0]
1259                             == ELEMENT_ELLIPSIS2))
1260                       {
1261                         elem->weights[cnt].w = (struct element_t **)
1262                           obstack_alloc (&collate->mempool,
1263                                          sizeof (struct element_t *));
1264                         elem->weights[cnt].w[0] = elem;
1265                         elem->weights[cnt].cnt = 1;
1266                       }
1267                     else
1268                       {
1269                         /* Simply use the weight from `ellipsis_weight'.  */
1270                         elem->weights[cnt].w =
1271                           collate->ellipsis_weight.weights[cnt].w;
1272                         elem->weights[cnt].cnt =
1273                           collate->ellipsis_weight.weights[cnt].cnt;
1274                       }
1275                 }
1276
1277               /* Increment for the next round.  */
1278             increment:
1279               for (cnt = len - 1; cnt >= 0; --cnt)
1280                 if (++mbcnt[cnt] != '\0')
1281                   break;
1282
1283               /* Find out whether this was all.  */
1284               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1285                 /* Yep, that's all.  */
1286                 break;
1287             }
1288         }
1289     }
1290   else
1291     {
1292       /* For symbolic range we naturally must have a beginning and an
1293          end specified by the user.  */
1294       if (startp == NULL)
1295         lr_error (ldfile, _("\
1296 %s: symbolic range ellipsis must not directly follow `order_start'"),
1297                   "LC_COLLATE");
1298       else if (endp == NULL)
1299         lr_error (ldfile, _("\
1300 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1301                   "LC_COLLATE");
1302       else
1303         {
1304           /* Determine the range.  To do so we have to determine the
1305              common prefix of the both names and then the numeric
1306              values of both ends.  */
1307           size_t lenfrom = strlen (startp->name);
1308           size_t lento = strlen (endp->name);
1309           char buf[lento + 1];
1310           int preflen = 0;
1311           long int from;
1312           long int to;
1313           char *cp;
1314           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1315
1316           if (lenfrom != lento)
1317             {
1318             invalid_range:
1319               lr_error (ldfile, _("\
1320 `%s' and `%.*s' are not valid names for symbolic range"),
1321                         startp->name, (int) lento, endp->name);
1322               return;
1323             }
1324
1325           while (startp->name[preflen] == endp->name[preflen])
1326             if (startp->name[preflen] == '\0')
1327               /* Nothing to be done.  The start and end point are identical
1328                  and while inserting the end point we have already given
1329                  the user an error message.  */
1330               return;
1331             else
1332               ++preflen;
1333
1334           errno = 0;
1335           from = strtol (startp->name + preflen, &cp, base);
1336           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1337             goto invalid_range;
1338
1339           errno = 0;
1340           to = strtol (endp->name + preflen, &cp, base);
1341           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1342             goto invalid_range;
1343
1344           /* Copy the prefix.  */
1345           memcpy (buf, startp->name, preflen);
1346
1347           /* Loop over all values.  */
1348           for (++from; from < to; ++from)
1349             {
1350               struct element_t *elem = NULL;
1351               struct charseq *seq;
1352               uint32_t wc;
1353               int cnt;
1354
1355               /* Generate the name.  */
1356               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1357                        (int) (lenfrom - preflen), from);
1358
1359               /* Look whether this name is already defined.  */
1360               void *ptr;
1361               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1362                 {
1363                   /* Copy back the result.  */
1364                   elem = ptr;
1365
1366                   if (elem->next != NULL || (collate->cursor != NULL
1367                                              && elem->next == collate->cursor))
1368                     {
1369                       lr_error (ldfile, _("\
1370 %s: order for `%.*s' already defined at %s:%Zu"),
1371                                 "LC_COLLATE", (int) lenfrom, buf,
1372                                 elem->file, elem->line);
1373                       continue;
1374                     }
1375
1376                   if (elem->name == NULL)
1377                     {
1378                       lr_error (ldfile, _("%s: `%s' must be a character"),
1379                                 "LC_COLLATE", buf);
1380                       continue;
1381                     }
1382                 }
1383
1384               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1385                 {
1386                   /* Search for a character of this name.  */
1387                   seq = charmap_find_value (charmap, buf, lenfrom);
1388                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1389                     {
1390                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1391
1392                       if (seq != NULL)
1393                         seq->ucs4 = wc;
1394                     }
1395                   else
1396                     wc = seq->ucs4;
1397
1398                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1399                     /* We don't know anything about a character with this
1400                        name.  XXX Should we warn?  */
1401                     continue;
1402
1403                   if (elem == NULL)
1404                     {
1405                       uint32_t wcs[2] = { wc, 0 };
1406
1407                       /* We have to allocate an entry.  */
1408                       elem = new_element (collate,
1409                                           seq != NULL
1410                                           ? (char *) seq->bytes : NULL,
1411                                           seq != NULL ? seq->nbytes : 0,
1412                                           wc == ILLEGAL_CHAR_VALUE
1413                                           ? NULL : wcs, buf, lenfrom, 1);
1414                     }
1415                   else
1416                     {
1417                       /* Update the element.  */
1418                       if (seq != NULL)
1419                         {
1420                           elem->mbs = obstack_copy0 (&collate->mempool,
1421                                                      seq->bytes, seq->nbytes);
1422                           elem->nmbs = seq->nbytes;
1423                         }
1424
1425                       if (wc != ILLEGAL_CHAR_VALUE)
1426                         {
1427                           uint32_t zero = 0;
1428
1429                           obstack_grow (&collate->mempool,
1430                                         &wc, sizeof (uint32_t));
1431                           obstack_grow (&collate->mempool,
1432                                         &zero, sizeof (uint32_t));
1433                           elem->wcs = obstack_finish (&collate->mempool);
1434                           elem->nwcs = 1;
1435                         }
1436                     }
1437
1438                   elem->file = ldfile->fname;
1439                   elem->line = ldfile->lineno;
1440                   elem->section = collate->current_section;
1441                 }
1442
1443               /* Enqueue the new element.  */
1444               elem->last = collate->cursor;
1445               elem->next = collate->cursor->next;
1446               elem->last->next = elem;
1447               if (elem->next != NULL)
1448                 elem->next->last = elem;
1449               collate->cursor = elem;
1450
1451               /* Now add the weights.  They come from the `ellipsis_weights'
1452                  member of `collate'.  */
1453               elem->weights = (struct element_list_t *)
1454                 obstack_alloc (&collate->mempool,
1455                                nrules * sizeof (struct element_list_t));
1456               for (cnt = 0; cnt < nrules; ++cnt)
1457                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1458                     && (collate->ellipsis_weight.weights[cnt].w[0]
1459                         == ELEMENT_ELLIPSIS2))
1460                   {
1461                     elem->weights[cnt].w = (struct element_t **)
1462                       obstack_alloc (&collate->mempool,
1463                                      sizeof (struct element_t *));
1464                     elem->weights[cnt].w[0] = elem;
1465                     elem->weights[cnt].cnt = 1;
1466                   }
1467                 else
1468                   {
1469                     /* Simly use the weight from `ellipsis_weight'.  */
1470                     elem->weights[cnt].w =
1471                       collate->ellipsis_weight.weights[cnt].w;
1472                     elem->weights[cnt].cnt =
1473                       collate->ellipsis_weight.weights[cnt].cnt;
1474                   }
1475             }
1476         }
1477     }
1478 }
1479
1480
1481 static void
1482 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1483                  struct localedef_t *copy_locale, int ignore_content)
1484 {
1485   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1486     {
1487       struct locale_collate_t *collate;
1488
1489       if (copy_locale == NULL)
1490         {
1491           collate = locale->categories[LC_COLLATE].collate =
1492             (struct locale_collate_t *)
1493             xcalloc (1, sizeof (struct locale_collate_t));
1494
1495           /* Init the various data structures.  */
1496           init_hash (&collate->elem_table, 100);
1497           init_hash (&collate->sym_table, 100);
1498           init_hash (&collate->seq_table, 500);
1499           obstack_init (&collate->mempool);
1500
1501           collate->col_weight_max = -1;
1502         }
1503       else
1504         /* Reuse the copy_locale's data structures.  */
1505         collate = locale->categories[LC_COLLATE].collate =
1506           copy_locale->categories[LC_COLLATE].collate;
1507     }
1508
1509   ldfile->translate_strings = 0;
1510   ldfile->return_widestr = 0;
1511 }
1512
1513
1514 void
1515 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1516 {
1517   /* Now is the time when we can assign the individual collation
1518      values for all the symbols.  We have possibly different values
1519      for the wide- and the multibyte-character symbols.  This is done
1520      since it might make a difference in the encoding if there is in
1521      some cases no multibyte-character but there are wide-characters.
1522      (The other way around it is not important since theencoded
1523      collation value in the wide-character case is 32 bits wide and
1524      therefore requires no encoding).
1525
1526      The lowest collation value assigned is 2.  Zero is reserved for
1527      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1528      functions and 1 is used to separate the individual passes for the
1529      different rules.
1530
1531      We also have to construct is list with all the bytes/words which
1532      can come first in a sequence, followed by all the elements which
1533      also start with this byte/word.  The order is reverse which has
1534      among others the important effect that longer strings are located
1535      first in the list.  This is required for the output data since
1536      the algorithm used in `strcoll' etc depends on this.
1537
1538      The multibyte case is easy.  We simply sort into an array with
1539      256 elements.  */
1540   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1541   int mbact[nrules];
1542   int wcact;
1543   int mbseqact;
1544   int wcseqact;
1545   struct element_t *runp;
1546   int i;
1547   int need_undefined = 0;
1548   struct section_list *sect;
1549   int ruleidx;
1550   int nr_wide_elems = 0;
1551
1552   if (collate == NULL)
1553     {
1554       /* No data, no check.  */
1555       if (! be_quiet)
1556         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1557                                 "LC_COLLATE"));
1558       return;
1559     }
1560
1561   /* If this assertion is hit change the type in `element_t'.  */
1562   assert (nrules <= sizeof (runp->used_in_level) * 8);
1563
1564   /* Make sure that the `position' rule is used either in all sections
1565      or in none.  */
1566   for (i = 0; i < nrules; ++i)
1567     for (sect = collate->sections; sect != NULL; sect = sect->next)
1568       if (sect != collate->current_section
1569           && sect->rules != NULL
1570           && ((sect->rules[i] & sort_position)
1571               != (collate->current_section->rules[i] & sort_position)))
1572         {
1573           WITH_CUR_LOCALE (error (0, 0, _("\
1574 %s: `position' must be used for a specific level in all sections or none"),
1575                                   "LC_COLLATE"));
1576           break;
1577         }
1578
1579   /* Find out which elements are used at which level.  At the same
1580      time we find out whether we have any undefined symbols.  */
1581   runp = collate->start;
1582   while (runp != NULL)
1583     {
1584       if (runp->mbs != NULL)
1585         {
1586           for (i = 0; i < nrules; ++i)
1587             {
1588               int j;
1589
1590               for (j = 0; j < runp->weights[i].cnt; ++j)
1591                 /* A NULL pointer as the weight means IGNORE.  */
1592                 if (runp->weights[i].w[j] != NULL)
1593                   {
1594                     if (runp->weights[i].w[j]->weights == NULL)
1595                       {
1596                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1597                                                         runp->line,
1598                                                         _("symbol `%s' not defined"),
1599                                                         runp->weights[i].w[j]->name));
1600
1601                         need_undefined = 1;
1602                         runp->weights[i].w[j] = &collate->undefined;
1603                       }
1604                     else
1605                       /* Set the bit for the level.  */
1606                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1607                   }
1608             }
1609         }
1610
1611       /* Up to the next entry.  */
1612       runp = runp->next;
1613     }
1614
1615   /* Walk through the list of defined sequences and assign weights.  Also
1616      create the data structure which will allow generating the single byte
1617      character based tables.
1618
1619      Since at each time only the weights for each of the rules are
1620      only compared to other weights for this rule it is possible to
1621      assign more compact weight values than simply counting all
1622      weights in sequence.  We can assign weights from 3, one for each
1623      rule individually and only for those elements, which are actually
1624      used for this rule.
1625
1626      Why is this important?  It is not for the wide char table.  But
1627      it is for the singlebyte output since here larger numbers have to
1628      be encoded to make it possible to emit the value as a byte
1629      string.  */
1630   for (i = 0; i < nrules; ++i)
1631     mbact[i] = 2;
1632   wcact = 2;
1633   mbseqact = 0;
1634   wcseqact = 0;
1635   runp = collate->start;
1636   while (runp != NULL)
1637     {
1638       /* Determine the order.  */
1639       if (runp->used_in_level != 0)
1640         {
1641           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1642                                                  nrules * sizeof (int));
1643
1644           for (i = 0; i < nrules; ++i)
1645             if ((runp->used_in_level & (1 << i)) != 0)
1646               runp->mborder[i] = mbact[i]++;
1647             else
1648               runp->mborder[i] = 0;
1649         }
1650
1651       if (runp->mbs != NULL)
1652         {
1653           struct element_t **eptr;
1654           struct element_t *lastp = NULL;
1655
1656           /* Find the point where to insert in the list.  */
1657           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1658           while (*eptr != NULL)
1659             {
1660               if ((*eptr)->nmbs < runp->nmbs)
1661                 break;
1662
1663               if ((*eptr)->nmbs == runp->nmbs)
1664                 {
1665                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1666
1667                   if (c == 0)
1668                     {
1669                       /* This should not happen.  It means that we have
1670                          to symbols with the same byte sequence.  It is
1671                          of course an error.  */
1672                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1673                                                       (*eptr)->line,
1674                                                       _("\
1675 symbol `%s' has the same encoding as"), (*eptr)->name);
1676                                        error_at_line (0, 0, runp->file,
1677                                                       runp->line,
1678                                                       _("symbol `%s'"),
1679                                                       runp->name));
1680                       goto dont_insert;
1681                     }
1682                   else if (c < 0)
1683                     /* Insert it here.  */
1684                     break;
1685                 }
1686
1687               /* To the next entry.  */
1688               lastp = *eptr;
1689               eptr = &(*eptr)->mbnext;
1690             }
1691
1692           /* Set the pointers.  */
1693           runp->mbnext = *eptr;
1694           runp->mblast = lastp;
1695           if (*eptr != NULL)
1696             (*eptr)->mblast = runp;
1697           *eptr = runp;
1698         dont_insert:
1699           ;
1700         }
1701
1702       if (runp->used_in_level)
1703         {
1704           runp->wcorder = wcact++;
1705
1706           /* We take the opportunity to count the elements which have
1707              wide characters.  */
1708           ++nr_wide_elems;
1709         }
1710
1711       if (runp->is_character)
1712         {
1713           if (runp->nmbs == 1)
1714             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1715
1716           runp->wcseqorder = wcseqact++;
1717         }
1718       else if (runp->mbs != NULL && runp->weights != NULL)
1719         /* This is for collation elements.  */
1720         runp->wcseqorder = wcseqact++;
1721
1722       /* Up to the next entry.  */
1723       runp = runp->next;
1724     }
1725
1726   /* Find out whether any of the `mbheads' entries is unset.  In this
1727      case we use the UNDEFINED entry.  */
1728   for (i = 1; i < 256; ++i)
1729     if (collate->mbheads[i] == NULL)
1730       {
1731         need_undefined = 1;
1732         collate->mbheads[i] = &collate->undefined;
1733       }
1734
1735   /* Now to the wide character case.  */
1736   collate->wcheads.p = 6;
1737   collate->wcheads.q = 10;
1738   wchead_table_init (&collate->wcheads);
1739
1740   collate->wcseqorder.p = 6;
1741   collate->wcseqorder.q = 10;
1742   collseq_table_init (&collate->wcseqorder);
1743
1744   /* Start adding.  */
1745   runp = collate->start;
1746   while (runp != NULL)
1747     {
1748       if (runp->wcs != NULL)
1749         {
1750           struct element_t *e;
1751           struct element_t **eptr;
1752           struct element_t *lastp;
1753
1754           /* Insert the collation sequence value.  */
1755           if (runp->is_character)
1756             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1757                                runp->wcseqorder);
1758
1759           /* Find the point where to insert in the list.  */
1760           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1761           eptr = &e;
1762           lastp = NULL;
1763           while (*eptr != NULL)
1764             {
1765               if ((*eptr)->nwcs < runp->nwcs)
1766                 break;
1767
1768               if ((*eptr)->nwcs == runp->nwcs)
1769                 {
1770                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1771                                    (wchar_t *) runp->wcs, runp->nwcs);
1772
1773                   if (c == 0)
1774                     {
1775                       /* This should not happen.  It means that we have
1776                          two symbols with the same byte sequence.  It is
1777                          of course an error.  */
1778                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1779                                                       (*eptr)->line,
1780                                                       _("\
1781 symbol `%s' has the same encoding as"), (*eptr)->name);
1782                                        error_at_line (0, 0, runp->file,
1783                                                       runp->line,
1784                                                       _("symbol `%s'"),
1785                                                       runp->name));
1786                       goto dont_insertwc;
1787                     }
1788                   else if (c < 0)
1789                     /* Insert it here.  */
1790                     break;
1791                 }
1792
1793               /* To the next entry.  */
1794               lastp = *eptr;
1795               eptr = &(*eptr)->wcnext;
1796             }
1797
1798           /* Set the pointers.  */
1799           runp->wcnext = *eptr;
1800           runp->wclast = lastp;
1801           if (*eptr != NULL)
1802             (*eptr)->wclast = runp;
1803           *eptr = runp;
1804           if (eptr == &e)
1805             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1806         dont_insertwc:
1807           ;
1808         }
1809
1810       /* Up to the next entry.  */
1811       runp = runp->next;
1812     }
1813
1814   collseq_table_finalize (&collate->wcseqorder);
1815
1816   /* Now determine whether the UNDEFINED entry is needed and if yes,
1817      whether it was defined.  */
1818   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1819   if (collate->undefined.file == NULL)
1820     {
1821       if (need_undefined)
1822         {
1823           /* This seems not to be enforced by recent standards.  Don't
1824              emit an error, simply append UNDEFINED at the end.  */
1825           if (0)
1826             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1827
1828           /* Add UNDEFINED at the end.  */
1829           collate->undefined.mborder =
1830             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1831
1832           for (i = 0; i < nrules; ++i)
1833             collate->undefined.mborder[i] = mbact[i]++;
1834         }
1835
1836       /* In any case we will need the definition for the wide character
1837          case.  But we will not complain that it is missing since the
1838          specification strangely enough does not seem to account for
1839          this.  */
1840       collate->undefined.wcorder = wcact++;
1841     }
1842
1843   /* Finally, try to unify the rules for the sections.  Whenever the rules
1844      for a section are the same as those for another section give the
1845      ruleset the same index.  Since there are never many section we can
1846      use an O(n^2) algorithm here.  */
1847   sect = collate->sections;
1848   while (sect != NULL && sect->rules == NULL)
1849     sect = sect->next;
1850
1851   /* Bail out if we have no sections because of earlier errors.  */
1852   if (sect == NULL)
1853     {
1854       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1855                               _("too many errors; giving up")));
1856       return;
1857     }
1858
1859   ruleidx = 0;
1860   do
1861     {
1862       struct section_list *osect = collate->sections;
1863
1864       while (osect != sect)
1865         if (osect->rules != NULL
1866             && memcmp (osect->rules, sect->rules,
1867                        nrules * sizeof (osect->rules[0])) == 0)
1868           break;
1869         else
1870           osect = osect->next;
1871
1872       if (osect == sect)
1873         sect->ruleidx = ruleidx++;
1874       else
1875         sect->ruleidx = osect->ruleidx;
1876
1877       /* Next section.  */
1878       do
1879         sect = sect->next;
1880       while (sect != NULL && sect->rules == NULL);
1881     }
1882   while (sect != NULL);
1883   /* We are currently not prepared for more than 128 rulesets.  But this
1884      should never really be a problem.  */
1885   assert (ruleidx <= 128);
1886 }
1887
1888
1889 static int32_t
1890 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1891                struct element_t *elem)
1892 {
1893   size_t cnt;
1894   int32_t retval;
1895
1896   /* Optimize the use of UNDEFINED.  */
1897   if (elem == &collate->undefined)
1898     /* The weights are already inserted.  */
1899     return 0;
1900
1901   /* This byte can start exactly one collation element and this is
1902      a single byte.  We can directly give the index to the weights.  */
1903   retval = obstack_object_size (pool);
1904
1905   /* Construct the weight.  */
1906   for (cnt = 0; cnt < nrules; ++cnt)
1907     {
1908       char buf[elem->weights[cnt].cnt * 7];
1909       int len = 0;
1910       int i;
1911
1912       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1913         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1914         if (elem->weights[cnt].w[i] != NULL)
1915           len += utf8_encode (&buf[len],
1916                               elem->weights[cnt].w[i]->mborder[cnt]);
1917
1918       /* And add the buffer content.  */
1919       obstack_1grow (pool, len);
1920       obstack_grow (pool, buf, len);
1921     }
1922
1923   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1924 }
1925
1926
1927 static int32_t
1928 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1929                  struct element_t *elem)
1930 {
1931   size_t cnt;
1932   int32_t retval;
1933
1934   /* Optimize the use of UNDEFINED.  */
1935   if (elem == &collate->undefined)
1936     /* The weights are already inserted.  */
1937     return 0;
1938
1939   /* This byte can start exactly one collation element and this is
1940      a single byte.  We can directly give the index to the weights.  */
1941   retval = obstack_object_size (pool) / sizeof (int32_t);
1942
1943   /* Construct the weight.  */
1944   for (cnt = 0; cnt < nrules; ++cnt)
1945     {
1946       int32_t buf[elem->weights[cnt].cnt];
1947       int i;
1948       int32_t j;
1949
1950       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1951         if (elem->weights[cnt].w[i] != NULL)
1952           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1953
1954       /* And add the buffer content.  */
1955       obstack_int32_grow (pool, j);
1956
1957       obstack_grow (pool, buf, j * sizeof (int32_t));
1958     }
1959
1960   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1961 }
1962
1963 /* If localedef is every threaded, this would need to be __thread var.  */
1964 static struct
1965 {
1966   struct obstack *weightpool;
1967   struct obstack *extrapool;
1968   struct obstack *indpool;
1969   struct locale_collate_t *collate;
1970   struct collidx_table *tablewc;
1971 } atwc;
1972
1973 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1974
1975 static void
1976 add_to_tablewc (uint32_t ch, struct element_t *runp)
1977 {
1978   if (runp->wcnext == NULL && runp->nwcs == 1)
1979     {
1980       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1981                                            runp);
1982       collidx_table_add (atwc.tablewc, ch, weigthidx);
1983     }
1984   else
1985     {
1986       /* As for the singlebyte table, we recognize sequences and
1987          compress them.  */
1988       struct element_t *lastp;
1989
1990       collidx_table_add (atwc.tablewc, ch,
1991                          -(obstack_object_size (atwc.extrapool)
1992                          / sizeof (uint32_t)));
1993
1994       do
1995         {
1996           /* Store the current index in the weight table.  We know that
1997              the current position in the `extrapool' is aligned on a
1998              32-bit address.  */
1999           int32_t weightidx;
2000           int added;
2001
2002           /* Find out wether this is a single entry or we have more than
2003              one consecutive entry.  */
2004           if (runp->wcnext != NULL
2005               && runp->nwcs == runp->wcnext->nwcs
2006               && wmemcmp ((wchar_t *) runp->wcs,
2007                           (wchar_t *)runp->wcnext->wcs,
2008                           runp->nwcs - 1) == 0
2009               && (runp->wcs[runp->nwcs - 1]
2010                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2011             {
2012               int i;
2013               struct element_t *series_startp = runp;
2014               struct element_t *curp;
2015
2016               /* Now add first the initial byte sequence.  */
2017               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2018               if (sizeof (int32_t) == sizeof (int))
2019                 obstack_make_room (atwc.extrapool, added);
2020
2021               /* More than one consecutive entry.  We mark this by having
2022                  a negative index into the indirect table.  */
2023               obstack_int32_grow_fast (atwc.extrapool,
2024                                        -(obstack_object_size (atwc.indpool)
2025                                          / sizeof (int32_t)));
2026               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2027
2028               do
2029                 runp = runp->wcnext;
2030               while (runp->wcnext != NULL
2031                      && runp->nwcs == runp->wcnext->nwcs
2032                      && wmemcmp ((wchar_t *) runp->wcs,
2033                                  (wchar_t *)runp->wcnext->wcs,
2034                                  runp->nwcs - 1) == 0
2035                      && (runp->wcs[runp->nwcs - 1]
2036                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2037
2038               /* Now walk backward from here to the beginning.  */
2039               curp = runp;
2040
2041               for (i = 1; i < runp->nwcs; ++i)
2042                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2043
2044               /* Now find the end of the consecutive sequence and
2045                  add all the indeces in the indirect pool.  */
2046               do
2047                 {
2048                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2049                                                curp);
2050                   obstack_int32_grow (atwc.indpool, weightidx);
2051
2052                   curp = curp->wclast;
2053                 }
2054               while (curp != series_startp);
2055
2056               /* Add the final weight.  */
2057               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2058                                            curp);
2059               obstack_int32_grow (atwc.indpool, weightidx);
2060
2061               /* And add the end byte sequence.  Without length this
2062                  time.  */
2063               for (i = 1; i < curp->nwcs; ++i)
2064                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2065             }
2066           else
2067             {
2068               /* A single entry.  Simply add the index and the length and
2069                  string (except for the first character which is already
2070                  tested for).  */
2071               int i;
2072
2073               /* Output the weight info.  */
2074               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2075                                            runp);
2076
2077               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2078               if (sizeof (int) == sizeof (int32_t))
2079                 obstack_make_room (atwc.extrapool, added);
2080
2081               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2082               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2083               for (i = 1; i < runp->nwcs; ++i)
2084                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2085             }
2086
2087           /* Next entry.  */
2088           lastp = runp;
2089           runp = runp->wcnext;
2090         }
2091       while (runp != NULL);
2092     }
2093 }
2094
2095 void
2096 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2097                 const char *output_path)
2098 {
2099   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2100   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2101   struct iovec iov[2 + nelems];
2102   struct locale_file data;
2103   uint32_t idx[nelems];
2104   size_t cnt;
2105   size_t ch;
2106   int32_t tablemb[256];
2107   struct obstack weightpool;
2108   struct obstack extrapool;
2109   struct obstack indirectpool;
2110   struct section_list *sect;
2111   struct collidx_table tablewc;
2112   uint32_t elem_size;
2113   uint32_t *elem_table;
2114   int i;
2115   struct element_t *runp;
2116
2117   data.magic = LIMAGIC (LC_COLLATE);
2118   data.n = nelems;
2119   iov[0].iov_base = (void *) &data;
2120   iov[0].iov_len = sizeof (data);
2121
2122   iov[1].iov_base = (void *) idx;
2123   iov[1].iov_len = sizeof (idx);
2124
2125   idx[0] = iov[0].iov_len + iov[1].iov_len;
2126   cnt = 0;
2127
2128   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2129   iov[2 + cnt].iov_base = &nrules;
2130   iov[2 + cnt].iov_len = sizeof (uint32_t);
2131   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2132   ++cnt;
2133
2134   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2135   if (collate == NULL)
2136     {
2137       int32_t dummy = 0;
2138
2139       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2140         {
2141           /* The words have to be handled specially.  */
2142           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2143             {
2144               iov[2 + cnt].iov_base = &dummy;
2145               iov[2 + cnt].iov_len = sizeof (int32_t);
2146             }
2147           else
2148             {
2149               iov[2 + cnt].iov_base = NULL;
2150               iov[2 + cnt].iov_len = 0;
2151             }
2152
2153           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2154             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2155           ++cnt;
2156         }
2157
2158       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2159
2160       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2161
2162       return;
2163     }
2164
2165   obstack_init (&weightpool);
2166   obstack_init (&extrapool);
2167   obstack_init (&indirectpool);
2168
2169   /* Since we are using the sign of an integer to mark indirection the
2170      offsets in the arrays we are indirectly referring to must not be
2171      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2172   obstack_int32_grow (&extrapool, 0);
2173   obstack_int32_grow (&indirectpool, 0);
2174
2175   /* Prepare the ruleset table.  */
2176   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2177     if (sect->rules != NULL && sect->ruleidx == i)
2178       {
2179         int j;
2180
2181         obstack_make_room (&weightpool, nrules);
2182
2183         for (j = 0; j < nrules; ++j)
2184           obstack_1grow_fast (&weightpool, sect->rules[j]);
2185         ++i;
2186       }
2187   /* And align the output.  */
2188   i = (nrules * i) % __alignof__ (int32_t);
2189   if (i > 0)
2190     do
2191       obstack_1grow (&weightpool, '\0');
2192     while (++i < __alignof__ (int32_t));
2193
2194   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2195   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2196   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2197   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2198   ++cnt;
2199
2200   /* Generate the 8-bit table.  Walk through the lists of sequences
2201      starting with the same byte and add them one after the other to
2202      the table.  In case we have more than one sequence starting with
2203      the same byte we have to use extra indirection.
2204
2205      First add a record for the NUL byte.  This entry will never be used
2206      so it does not matter.  */
2207   tablemb[0] = 0;
2208
2209   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2210      will probably be used more than once it is good to store the
2211      weights only once.  */
2212   if (collate->undefined.used_in_level != 0)
2213     output_weight (&weightpool, collate, &collate->undefined);
2214
2215   for (ch = 1; ch < 256; ++ch)
2216     if (collate->mbheads[ch]->mbnext == NULL
2217         && collate->mbheads[ch]->nmbs <= 1)
2218       {
2219         tablemb[ch] = output_weight (&weightpool, collate,
2220                                      collate->mbheads[ch]);
2221       }
2222     else
2223       {
2224         /* The entries in the list are sorted by length and then
2225            alphabetically.  This is the order in which we will add the
2226            elements to the collation table.  This allows simply walking
2227            the table in sequence and stopping at the first matching
2228            entry.  Since the longer sequences are coming first in the
2229            list they have the possibility to match first, just as it
2230            has to be.  In the worst case we are walking to the end of
2231            the list where we put, if no singlebyte sequence is defined
2232            in the locale definition, the weights for UNDEFINED.
2233
2234            To reduce the length of the search list we compress them a bit.
2235            This happens by collecting sequences of consecutive byte
2236            sequences in one entry (having and begin and end byte sequence)
2237            and add only one index into the weight table.  We can find the
2238            consecutive entries since they are also consecutive in the list.  */
2239         struct element_t *runp = collate->mbheads[ch];
2240         struct element_t *lastp;
2241
2242         assert ((obstack_object_size (&extrapool)
2243                  & (__alignof__ (int32_t) - 1)) == 0);
2244
2245         tablemb[ch] = -obstack_object_size (&extrapool);
2246
2247         do
2248           {
2249             /* Store the current index in the weight table.  We know that
2250                the current position in the `extrapool' is aligned on a
2251                32-bit address.  */
2252             int32_t weightidx;
2253             int added;
2254
2255             /* Find out wether this is a single entry or we have more than
2256                one consecutive entry.  */
2257             if (runp->mbnext != NULL
2258                 && runp->nmbs == runp->mbnext->nmbs
2259                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2260                 && (runp->mbs[runp->nmbs - 1]
2261                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2262               {
2263                 int i;
2264                 struct element_t *series_startp = runp;
2265                 struct element_t *curp;
2266
2267                 /* Compute how much space we will need.  */
2268                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2269                           + __alignof__ (int32_t) - 1)
2270                          & ~(__alignof__ (int32_t) - 1));
2271                 assert ((obstack_object_size (&extrapool)
2272                          & (__alignof__ (int32_t) - 1)) == 0);
2273                 obstack_make_room (&extrapool, added);
2274
2275                 /* More than one consecutive entry.  We mark this by having
2276                    a negative index into the indirect table.  */
2277                 obstack_int32_grow_fast (&extrapool,
2278                                          -(obstack_object_size (&indirectpool)
2279                                            / sizeof (int32_t)));
2280
2281                 /* Now search first the end of the series.  */
2282                 do
2283                   runp = runp->mbnext;
2284                 while (runp->mbnext != NULL
2285                        && runp->nmbs == runp->mbnext->nmbs
2286                        && memcmp (runp->mbs, runp->mbnext->mbs,
2287                                   runp->nmbs - 1) == 0
2288                        && (runp->mbs[runp->nmbs - 1]
2289                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2290
2291                 /* Now walk backward from here to the beginning.  */
2292                 curp = runp;
2293
2294                 assert (runp->nmbs <= 256);
2295                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2296                 for (i = 1; i < curp->nmbs; ++i)
2297                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2298
2299                 /* Now find the end of the consecutive sequence and
2300                    add all the indeces in the indirect pool.  */
2301                 do
2302                   {
2303                     weightidx = output_weight (&weightpool, collate, curp);
2304                     obstack_int32_grow (&indirectpool, weightidx);
2305
2306                     curp = curp->mblast;
2307                   }
2308                 while (curp != series_startp);
2309
2310                 /* Add the final weight.  */
2311                 weightidx = output_weight (&weightpool, collate, curp);
2312                 obstack_int32_grow (&indirectpool, weightidx);
2313
2314                 /* And add the end byte sequence.  Without length this
2315                    time.  */
2316                 for (i = 1; i < curp->nmbs; ++i)
2317                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2318               }
2319             else
2320               {
2321                 /* A single entry.  Simply add the index and the length and
2322                    string (except for the first character which is already
2323                    tested for).  */
2324                 int i;
2325
2326                 /* Output the weight info.  */
2327                 weightidx = output_weight (&weightpool, collate, runp);
2328
2329                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2330                           + __alignof__ (int32_t) - 1)
2331                          & ~(__alignof__ (int32_t) - 1));
2332                 assert ((obstack_object_size (&extrapool)
2333                          & (__alignof__ (int32_t) - 1)) == 0);
2334                 obstack_make_room (&extrapool, added);
2335
2336                 obstack_int32_grow_fast (&extrapool, weightidx);
2337                 assert (runp->nmbs <= 256);
2338                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2339
2340                 for (i = 1; i < runp->nmbs; ++i)
2341                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2342               }
2343
2344             /* Add alignment bytes if necessary.  */
2345             while ((obstack_object_size (&extrapool)
2346                     & (__alignof__ (int32_t) - 1)) != 0)
2347               obstack_1grow_fast (&extrapool, '\0');
2348
2349             /* Next entry.  */
2350             lastp = runp;
2351             runp = runp->mbnext;
2352           }
2353         while (runp != NULL);
2354
2355         assert ((obstack_object_size (&extrapool)
2356                  & (__alignof__ (int32_t) - 1)) == 0);
2357
2358         /* If the final entry in the list is not a single character we
2359            add an UNDEFINED entry here.  */
2360         if (lastp->nmbs != 1)
2361           {
2362             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2363                          & ~(__alignof__ (int32_t) - 1));
2364             obstack_make_room (&extrapool, added);
2365
2366             obstack_int32_grow_fast (&extrapool, 0);
2367             /* XXX What rule? We just pick the first.  */
2368             obstack_1grow_fast (&extrapool, 0);
2369             /* Length is zero.  */
2370             obstack_1grow_fast (&extrapool, 0);
2371
2372             /* Add alignment bytes if necessary.  */
2373             while ((obstack_object_size (&extrapool)
2374                     & (__alignof__ (int32_t) - 1)) != 0)
2375               obstack_1grow_fast (&extrapool, '\0');
2376           }
2377       }
2378
2379   /* Add padding to the tables if necessary.  */
2380   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2381          != 0)
2382     obstack_1grow (&weightpool, 0);
2383
2384   /* Now add the four tables.  */
2385   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2386   iov[2 + cnt].iov_base = tablemb;
2387   iov[2 + cnt].iov_len = sizeof (tablemb);
2388   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2389   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2390   ++cnt;
2391
2392   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2393   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2394   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2395   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2396   ++cnt;
2397
2398   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2399   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2400   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2401   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2402   ++cnt;
2403
2404   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2405   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2406   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2407   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2408   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2409   ++cnt;
2410
2411
2412   /* Now the same for the wide character table.  We need to store some
2413      more information here.  */
2414   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2415   iov[2 + cnt].iov_base = NULL;
2416   iov[2 + cnt].iov_len = 0;
2417   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2418   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2419   ++cnt;
2420
2421   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2422   iov[2 + cnt].iov_base = NULL;
2423   iov[2 + cnt].iov_len = 0;
2424   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2425   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2426   ++cnt;
2427
2428   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2429   iov[2 + cnt].iov_base = NULL;
2430   iov[2 + cnt].iov_len = 0;
2431   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2432   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2433   ++cnt;
2434
2435   /* Since we are using the sign of an integer to mark indirection the
2436      offsets in the arrays we are indirectly referring to must not be
2437      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2438   obstack_int32_grow (&extrapool, 0);
2439   obstack_int32_grow (&indirectpool, 0);
2440
2441   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2442      will probably be used more than once it is good to store the
2443      weights only once.  */
2444   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2445     abort ();
2446
2447   /* Generate the table.  Walk through the lists of sequences starting
2448      with the same wide character and add them one after the other to
2449      the table.  In case we have more than one sequence starting with
2450      the same byte we have to use extra indirection.  */
2451   tablewc.p = 6;
2452   tablewc.q = 10;
2453   collidx_table_init (&tablewc);
2454
2455   atwc.weightpool = &weightpool;
2456   atwc.extrapool = &extrapool;
2457   atwc.indpool = &indirectpool;
2458   atwc.collate = collate;
2459   atwc.tablewc = &tablewc;
2460
2461   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2462
2463   memset (&atwc, 0, sizeof (atwc));
2464
2465   collidx_table_finalize (&tablewc);
2466
2467   /* Now add the four tables.  */
2468   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2469   iov[2 + cnt].iov_base = tablewc.result;
2470   iov[2 + cnt].iov_len = tablewc.result_size;
2471   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2472   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2473   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2474   ++cnt;
2475
2476   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2477   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2478   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2479   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2480   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2481   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2482   ++cnt;
2483
2484   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2485   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2486   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2487   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2488   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2489   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2490   ++cnt;
2491
2492   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2493   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2494   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2495   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2496   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2497   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2498   ++cnt;
2499
2500
2501   /* Finally write the table with collation element names out.  It is
2502      a hash table with a simple function which gets the name of the
2503      character as the input.  One character might have many names.  The
2504      value associated with the name is an index into the weight table
2505      where we are then interested in the first-level weight value.
2506
2507      To determine how large the table should be we are counting the
2508      elements have to put in.  Since we are using internal chaining
2509      using a secondary hash function we have to make the table a bit
2510      larger to avoid extremely long search times.  We can achieve
2511      good results with a 40% larger table than there are entries.  */
2512   elem_size = 0;
2513   runp = collate->start;
2514   while (runp != NULL)
2515     {
2516       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2517         /* Yep, the element really counts.  */
2518         ++elem_size;
2519
2520       runp = runp->next;
2521     }
2522   /* Add 40% and find the next prime number.  */
2523   elem_size = next_prime (elem_size * 1.4);
2524
2525   /* Allocate the table.  Each entry consists of two words: the hash
2526      value and an index in a secondary table which provides the index
2527      into the weight table and the string itself (so that a match can
2528      be determined).  */
2529   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2530                                            elem_size * 2 * sizeof (uint32_t));
2531   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2532
2533   /* Now add the elements.  */
2534   runp = collate->start;
2535   while (runp != NULL)
2536     {
2537       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2538         {
2539           /* Compute the hash value of the name.  */
2540           uint32_t namelen = strlen (runp->name);
2541           uint32_t hash = elem_hash (runp->name, namelen);
2542           size_t idx = hash % elem_size;
2543 #ifndef NDEBUG
2544           size_t start_idx = idx;
2545 #endif
2546
2547           if (elem_table[idx * 2] != 0)
2548             {
2549               /* The spot is already taken.  Try iterating using the value
2550                  from the secondary hashing function.  */
2551               size_t iter = hash % (elem_size - 2) + 1;
2552
2553               do
2554                 {
2555                   idx += iter;
2556                   if (idx >= elem_size)
2557                     idx -= elem_size;
2558                   assert (idx != start_idx);
2559                 }
2560               while (elem_table[idx * 2] != 0);
2561             }
2562           /* This is the spot where we will insert the value.  */
2563           elem_table[idx * 2] = hash;
2564           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2565
2566           /* The the string itself including length.  */
2567           obstack_1grow (&extrapool, namelen);
2568           obstack_grow (&extrapool, runp->name, namelen);
2569
2570           /* And the multibyte representation.  */
2571           obstack_1grow (&extrapool, runp->nmbs);
2572           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2573
2574           /* And align again to 32 bits.  */
2575           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2576             obstack_grow (&extrapool, "\0\0",
2577                           (sizeof (int32_t)
2578                            - ((1 + namelen + 1 + runp->nmbs)
2579                               % sizeof (int32_t))));
2580
2581           /* Now some 32-bit values: multibyte collation sequence,
2582              wide char string (including length), and wide char
2583              collation sequence.  */
2584           obstack_int32_grow (&extrapool, runp->mbseqorder);
2585
2586           obstack_int32_grow (&extrapool, runp->nwcs);
2587           obstack_grow (&extrapool, runp->wcs,
2588                         runp->nwcs * sizeof (uint32_t));
2589
2590           obstack_int32_grow (&extrapool, runp->wcseqorder);
2591         }
2592
2593       runp = runp->next;
2594     }
2595
2596   /* Prepare to write out this data.  */
2597   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2598   iov[2 + cnt].iov_base = &elem_size;
2599   iov[2 + cnt].iov_len = sizeof (int32_t);
2600   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2601   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2602   ++cnt;
2603
2604   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2605   iov[2 + cnt].iov_base = elem_table;
2606   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2607   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2608   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2609   ++cnt;
2610
2611   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2612   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2613   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2614   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2615   ++cnt;
2616
2617   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2618   iov[2 + cnt].iov_base = collate->mbseqorder;
2619   iov[2 + cnt].iov_len = 256;
2620   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2621   ++cnt;
2622
2623   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2624   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2625   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2626   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2627   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2628   ++cnt;
2629
2630   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2631   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2632   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2633   ++cnt;
2634
2635   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2636
2637   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2638
2639   obstack_free (&weightpool, NULL);
2640   obstack_free (&extrapool, NULL);
2641   obstack_free (&indirectpool, NULL);
2642 }
2643
2644
2645 static enum token_t
2646 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2647          const struct charmap_t *charmap, int to_endif)
2648 {
2649   while (1)
2650     {
2651       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2652       enum token_t nowtok = now->tok;
2653
2654       if (nowtok == tok_eof || nowtok == tok_end)
2655         return nowtok;
2656
2657       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2658         {
2659           lr_error (ldfile, _("%s: nested conditionals not supported"),
2660                     "LC_COLLATE");
2661           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2662           if (nowtok == tok_eof || nowtok == tok_end)
2663             return nowtok;
2664         }
2665       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2666         {
2667           lr_ignore_rest (ldfile, 1);
2668           return nowtok;
2669         }
2670       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2671         {
2672           /* Do not read the rest of the line.  */
2673           return nowtok;
2674         }
2675       else if (nowtok == tok_else)
2676         {
2677           lr_error (ldfile, _("%s: more then one 'else'"), "LC_COLLATE");
2678         }
2679
2680       lr_ignore_rest (ldfile, 0);
2681     }
2682 }
2683
2684
2685 void
2686 collate_read (struct linereader *ldfile, struct localedef_t *result,
2687               const struct charmap_t *charmap, const char *repertoire_name,
2688               int ignore_content)
2689 {
2690   struct repertoire_t *repertoire = NULL;
2691   struct locale_collate_t *collate;
2692   struct token *now;
2693   struct token *arg = NULL;
2694   enum token_t nowtok;
2695   enum token_t was_ellipsis = tok_none;
2696   struct localedef_t *copy_locale = NULL;
2697   /* Parsing state:
2698      0 - start
2699      1 - between `order-start' and `order-end'
2700      2 - after `order-end'
2701      3 - after `reorder-after', waiting for `reorder-end'
2702      4 - after `reorder-end'
2703      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2704      6 - after `reorder-sections-end'
2705   */
2706   int state = 0;
2707
2708   /* Get the repertoire we have to use.  */
2709   if (repertoire_name != NULL)
2710     repertoire = repertoire_read (repertoire_name);
2711
2712   /* The rest of the line containing `LC_COLLATE' must be free.  */
2713   lr_ignore_rest (ldfile, 1);
2714
2715   while (1)
2716     {
2717       do
2718         {
2719           now = lr_token (ldfile, charmap, result, NULL, verbose);
2720           nowtok = now->tok;
2721         }
2722       while (nowtok == tok_eol);
2723
2724       if (nowtok != tok_define)
2725         break;
2726
2727       if (ignore_content)
2728         lr_ignore_rest (ldfile, 0);
2729       else
2730         {
2731           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2732           if (arg->tok != tok_ident)
2733             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2734           else
2735             {
2736               /* Simply add the new symbol.  */
2737               struct name_list *newsym = xmalloc (sizeof (*newsym)
2738                                                   + arg->val.str.lenmb + 1);
2739               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2740               newsym->str[arg->val.str.lenmb] = '\0';
2741               newsym->next = defined;
2742               defined = newsym;
2743
2744               lr_ignore_rest (ldfile, 1);
2745             }
2746         }
2747     }
2748
2749   if (nowtok == tok_copy)
2750     {
2751       now = lr_token (ldfile, charmap, result, NULL, verbose);
2752       if (now->tok != tok_string)
2753         {
2754           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2755
2756         skip_category:
2757           do
2758             now = lr_token (ldfile, charmap, result, NULL, verbose);
2759           while (now->tok != tok_eof && now->tok != tok_end);
2760
2761           if (now->tok != tok_eof
2762               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2763                   now->tok == tok_eof))
2764             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2765           else if (now->tok != tok_lc_collate)
2766             {
2767               lr_error (ldfile, _("\
2768 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2769               lr_ignore_rest (ldfile, 0);
2770             }
2771           else
2772             lr_ignore_rest (ldfile, 1);
2773
2774           return;
2775         }
2776
2777       if (! ignore_content)
2778         {
2779           /* Get the locale definition.  */
2780           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2781                                      repertoire_name, charmap, NULL);
2782           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2783             {
2784               /* Not yet loaded.  So do it now.  */
2785               if (locfile_read (copy_locale, charmap) != 0)
2786                 goto skip_category;
2787             }
2788
2789           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2790             return;
2791         }
2792
2793       lr_ignore_rest (ldfile, 1);
2794
2795       now = lr_token (ldfile, charmap, result, NULL, verbose);
2796       nowtok = now->tok;
2797     }
2798
2799   /* Prepare the data structures.  */
2800   collate_startup (ldfile, result, copy_locale, ignore_content);
2801   collate = result->categories[LC_COLLATE].collate;
2802
2803   while (1)
2804     {
2805       char ucs4buf[10];
2806       char *symstr;
2807       size_t symlen;
2808
2809       /* Of course we don't proceed beyond the end of file.  */
2810       if (nowtok == tok_eof)
2811         break;
2812
2813       /* Ingore empty lines.  */
2814       if (nowtok == tok_eol)
2815         {
2816           now = lr_token (ldfile, charmap, result, NULL, verbose);
2817           nowtok = now->tok;
2818           continue;
2819         }
2820
2821       switch (nowtok)
2822         {
2823         case tok_copy:
2824           /* Allow copying other locales.  */
2825           now = lr_token (ldfile, charmap, result, NULL, verbose);
2826           if (now->tok != tok_string)
2827             goto err_label;
2828
2829           if (! ignore_content)
2830             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2831                          charmap, result);
2832
2833           lr_ignore_rest (ldfile, 1);
2834           break;
2835
2836         case tok_coll_weight_max:
2837           /* Ignore the rest of the line if we don't need the input of
2838              this line.  */
2839           if (ignore_content)
2840             {
2841               lr_ignore_rest (ldfile, 0);
2842               break;
2843             }
2844
2845           if (state != 0)
2846             goto err_label;
2847
2848           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2849           if (arg->tok != tok_number)
2850             goto err_label;
2851           if (collate->col_weight_max != -1)
2852             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2853                       "LC_COLLATE", "col_weight_max");
2854           else
2855             collate->col_weight_max = arg->val.num;
2856           lr_ignore_rest (ldfile, 1);
2857           break;
2858
2859         case tok_section_symbol:
2860           /* Ignore the rest of the line if we don't need the input of
2861              this line.  */
2862           if (ignore_content)
2863             {
2864               lr_ignore_rest (ldfile, 0);
2865               break;
2866             }
2867
2868           if (state != 0)
2869             goto err_label;
2870
2871           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2872           if (arg->tok != tok_bsymbol)
2873             goto err_label;
2874           else if (!ignore_content)
2875             {
2876               /* Check whether this section is already known.  */
2877               struct section_list *known = collate->sections;
2878               while (known != NULL)
2879                 {
2880                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2881                     break;
2882                   known = known->next;
2883                 }
2884
2885               if (known != NULL)
2886                 {
2887                   lr_error (ldfile,
2888                             _("%s: duplicate declaration of section `%s'"),
2889                             "LC_COLLATE", arg->val.str.startmb);
2890                   free (arg->val.str.startmb);
2891                 }
2892               else
2893                 collate->sections = make_seclist_elem (collate,
2894                                                        arg->val.str.startmb,
2895                                                        collate->sections);
2896
2897               lr_ignore_rest (ldfile, known == NULL);
2898             }
2899           else
2900             {
2901               free (arg->val.str.startmb);
2902               lr_ignore_rest (ldfile, 0);
2903             }
2904           break;
2905
2906         case tok_collating_element:
2907           /* Ignore the rest of the line if we don't need the input of
2908              this line.  */
2909           if (ignore_content)
2910             {
2911               lr_ignore_rest (ldfile, 0);
2912               break;
2913             }
2914
2915           if (state != 0 && state != 2)
2916             goto err_label;
2917
2918           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2919           if (arg->tok != tok_bsymbol)
2920             goto err_label;
2921           else
2922             {
2923               const char *symbol = arg->val.str.startmb;
2924               size_t symbol_len = arg->val.str.lenmb;
2925
2926               /* Next the `from' keyword.  */
2927               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2928               if (arg->tok != tok_from)
2929                 {
2930                   free ((char *) symbol);
2931                   goto err_label;
2932                 }
2933
2934               ldfile->return_widestr = 1;
2935               ldfile->translate_strings = 1;
2936
2937               /* Finally the string with the replacement.  */
2938               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2939
2940               ldfile->return_widestr = 0;
2941               ldfile->translate_strings = 0;
2942
2943               if (arg->tok != tok_string)
2944                 goto err_label;
2945
2946               if (!ignore_content && symbol != NULL)
2947                 {
2948                   /* The name is already defined.  */
2949                   if (check_duplicate (ldfile, collate, charmap,
2950                                        repertoire, symbol, symbol_len))
2951                     goto col_elem_free;
2952
2953                   if (arg->val.str.startmb != NULL)
2954                     insert_entry (&collate->elem_table, symbol, symbol_len,
2955                                   new_element (collate,
2956                                                arg->val.str.startmb,
2957                                                arg->val.str.lenmb - 1,
2958                                                arg->val.str.startwc,
2959                                                symbol, symbol_len, 0));
2960                 }
2961               else
2962                 {
2963                 col_elem_free:
2964                   if (symbol != NULL)
2965                     free ((char *) symbol);
2966                   free (arg->val.str.startmb);
2967                   free (arg->val.str.startwc);
2968                 }
2969               lr_ignore_rest (ldfile, 1);
2970             }
2971           break;
2972
2973         case tok_collating_symbol:
2974           /* Ignore the rest of the line if we don't need the input of
2975              this line.  */
2976           if (ignore_content)
2977             {
2978               lr_ignore_rest (ldfile, 0);
2979               break;
2980             }
2981
2982           if (state != 0 && state != 2)
2983             goto err_label;
2984
2985           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2986           if (arg->tok != tok_bsymbol)
2987             goto err_label;
2988           else
2989             {
2990               char *symbol = arg->val.str.startmb;
2991               size_t symbol_len = arg->val.str.lenmb;
2992               char *endsymbol = NULL;
2993               size_t endsymbol_len = 0;
2994               enum token_t ellipsis = tok_none;
2995
2996               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2997               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2998                 {
2999                   ellipsis = arg->tok;
3000
3001                   arg = lr_token (ldfile, charmap, result, repertoire,
3002                                   verbose);
3003                   if (arg->tok != tok_bsymbol)
3004                     {
3005                       free (symbol);
3006                       goto err_label;
3007                     }
3008
3009                   endsymbol = arg->val.str.startmb;
3010                   endsymbol_len = arg->val.str.lenmb;
3011
3012                   lr_ignore_rest (ldfile, 1);
3013                 }
3014               else if (arg->tok != tok_eol)
3015                 {
3016                   free (symbol);
3017                   goto err_label;
3018                 }
3019
3020               if (!ignore_content)
3021                 {
3022                   if (symbol == NULL
3023                       || (ellipsis != tok_none && endsymbol == NULL))
3024                     {
3025                       lr_error (ldfile, _("\
3026 %s: unknown character in collating symbol name"),
3027                                 "LC_COLLATE");
3028                       goto col_sym_free;
3029                     }
3030                   else if (ellipsis == tok_none)
3031                     {
3032                       /* A single symbol, no ellipsis.  */
3033                       if (check_duplicate (ldfile, collate, charmap,
3034                                            repertoire, symbol, symbol_len))
3035                         /* The name is already defined.  */
3036                         goto col_sym_free;
3037
3038                       insert_entry (&collate->sym_table, symbol, symbol_len,
3039                                     new_symbol (collate, symbol, symbol_len));
3040                     }
3041                   else if (symbol_len != endsymbol_len)
3042                     {
3043                     col_sym_inv_range:
3044                       lr_error (ldfile,
3045                                 _("invalid names for character range"));
3046                       goto col_sym_free;
3047                     }
3048                   else
3049                     {
3050                       /* Oh my, we have to handle an ellipsis.  First, as
3051                          usual, determine the common prefix and then
3052                          convert the rest into a range.  */
3053                       size_t prefixlen;
3054                       unsigned long int from;
3055                       unsigned long int to;
3056                       char *endp;
3057
3058                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
3059                         if (symbol[prefixlen] != endsymbol[prefixlen])
3060                           break;
3061
3062                       /* Convert the rest into numbers.  */
3063                       symbol[symbol_len] = '\0';
3064                       from = strtoul (&symbol[prefixlen], &endp,
3065                                       ellipsis == tok_ellipsis2 ? 16 : 10);
3066                       if (*endp != '\0')
3067                         goto col_sym_inv_range;
3068
3069                       endsymbol[symbol_len] = '\0';
3070                       to = strtoul (&endsymbol[prefixlen], &endp,
3071                                     ellipsis == tok_ellipsis2 ? 16 : 10);
3072                       if (*endp != '\0')
3073                         goto col_sym_inv_range;
3074
3075                       if (from > to)
3076                         goto col_sym_inv_range;
3077
3078                       /* Now loop over all entries.  */
3079                       while (from <= to)
3080                         {
3081                           char *symbuf;
3082
3083                           symbuf = (char *) obstack_alloc (&collate->mempool,
3084                                                            symbol_len + 1);
3085
3086                           /* Create the name.  */
3087                           sprintf (symbuf,
3088                                    ellipsis == tok_ellipsis2
3089                                    ? "%.*s%.*lX" : "%.*s%.*lu",
3090                                    (int) prefixlen, symbol,
3091                                    (int) (symbol_len - prefixlen), from);
3092
3093                           if (check_duplicate (ldfile, collate, charmap,
3094                                                repertoire, symbuf, symbol_len))
3095                             /* The name is already defined.  */
3096                             goto col_sym_free;
3097
3098                           insert_entry (&collate->sym_table, symbuf,
3099                                         symbol_len,
3100                                         new_symbol (collate, symbuf,
3101                                                     symbol_len));
3102
3103                           /* Increment the counter.  */
3104                           ++from;
3105                         }
3106
3107                       goto col_sym_free;
3108                     }
3109                 }
3110               else
3111                 {
3112                 col_sym_free:
3113                   free (symbol);
3114                   free (endsymbol);
3115                 }
3116             }
3117           break;
3118
3119         case tok_symbol_equivalence:
3120           /* Ignore the rest of the line if we don't need the input of
3121              this line.  */
3122           if (ignore_content)
3123             {
3124               lr_ignore_rest (ldfile, 0);
3125               break;
3126             }
3127
3128           if (state != 0)
3129             goto err_label;
3130
3131           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3132           if (arg->tok != tok_bsymbol)
3133             goto err_label;
3134           else
3135             {
3136               const char *newname = arg->val.str.startmb;
3137               size_t newname_len = arg->val.str.lenmb;
3138               const char *symname;
3139               size_t symname_len;
3140               void *symval;     /* Actually struct symbol_t*  */
3141
3142               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3143               if (arg->tok != tok_bsymbol)
3144                 {
3145                   if (newname != NULL)
3146                     free ((char *) newname);
3147                   goto err_label;
3148                 }
3149
3150               symname = arg->val.str.startmb;
3151               symname_len = arg->val.str.lenmb;
3152
3153               if (newname == NULL)
3154                 {
3155                   lr_error (ldfile, _("\
3156 %s: unknown character in equivalent definition name"),
3157                             "LC_COLLATE");
3158
3159                 sym_equiv_free:
3160                   if (newname != NULL)
3161                     free ((char *) newname);
3162                   if (symname != NULL)
3163                     free ((char *) symname);
3164                   break;
3165                 }
3166               if (symname == NULL)
3167                 {
3168                   lr_error (ldfile, _("\
3169 %s: unknown character in equivalent definition value"),
3170                             "LC_COLLATE");
3171                   goto sym_equiv_free;
3172                 }
3173
3174               /* See whether the symbol name is already defined.  */
3175               if (find_entry (&collate->sym_table, symname, symname_len,
3176                               &symval) != 0)
3177                 {
3178                   lr_error (ldfile, _("\
3179 %s: unknown symbol `%s' in equivalent definition"),
3180                             "LC_COLLATE", symname);
3181                   goto sym_equiv_free;
3182                 }
3183
3184               if (insert_entry (&collate->sym_table,
3185                                 newname, newname_len, symval) < 0)
3186                 {
3187                   lr_error (ldfile, _("\
3188 error while adding equivalent collating symbol"));
3189                   goto sym_equiv_free;
3190                 }
3191
3192               free ((char *) symname);
3193             }
3194           lr_ignore_rest (ldfile, 1);
3195           break;
3196
3197         case tok_script:
3198           /* We get told about the scripts we know.  */
3199           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3200           if (arg->tok != tok_bsymbol)
3201             goto err_label;
3202           else
3203             {
3204               struct section_list *runp = collate->known_sections;
3205               char *name;
3206
3207               while (runp != NULL)
3208                 if (strncmp (runp->name, arg->val.str.startmb,
3209                              arg->val.str.lenmb) == 0
3210                     && runp->name[arg->val.str.lenmb] == '\0')
3211                   break;
3212                 else
3213                   runp = runp->def_next;
3214
3215               if (runp != NULL)
3216                 {
3217                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3218                             runp->name);
3219                   lr_ignore_rest (ldfile, 0);
3220                   break;
3221                 }
3222
3223               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3224               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3225               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3226               name[arg->val.str.lenmb] = '\0';
3227               runp->name = name;
3228
3229               runp->def_next = collate->known_sections;
3230               collate->known_sections = runp;
3231             }
3232           lr_ignore_rest (ldfile, 1);
3233           break;
3234
3235         case tok_order_start:
3236           /* Ignore the rest of the line if we don't need the input of
3237              this line.  */
3238           if (ignore_content)
3239             {
3240               lr_ignore_rest (ldfile, 0);
3241               break;
3242             }
3243
3244           if (state != 0 && state != 1 && state != 2)
3245             goto err_label;
3246           state = 1;
3247
3248           /* The 14652 draft does not specify whether all `order_start' lines
3249              must contain the same number of sort-rules, but 14651 does.  So
3250              we require this here as well.  */
3251           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3252           if (arg->tok == tok_bsymbol)
3253             {
3254               /* This better should be a section name.  */
3255               struct section_list *sp = collate->known_sections;
3256               while (sp != NULL
3257                      && (sp->name == NULL
3258                          || strncmp (sp->name, arg->val.str.startmb,
3259                                      arg->val.str.lenmb) != 0
3260                          || sp->name[arg->val.str.lenmb] != '\0'))
3261                 sp = sp->def_next;
3262
3263               if (sp == NULL)
3264                 {
3265                   lr_error (ldfile, _("\
3266 %s: unknown section name `%.*s'"),
3267                             "LC_COLLATE", (int) arg->val.str.lenmb,
3268                             arg->val.str.startmb);
3269                   /* We use the error section.  */
3270                   collate->current_section = &collate->error_section;
3271
3272                   if (collate->error_section.first == NULL)
3273                     {
3274                       /* Insert &collate->error_section at the end of
3275                          the collate->sections list.  */
3276                       if (collate->sections == NULL)
3277                         collate->sections = &collate->error_section;
3278                       else
3279                         {
3280                           sp = collate->sections;
3281                           while (sp->next != NULL)
3282                             sp = sp->next;
3283
3284                           sp->next = &collate->error_section;
3285                         }
3286                       collate->error_section.next = NULL;
3287                     }
3288                 }
3289               else
3290                 {
3291                   /* One should not be allowed to open the same
3292                      section twice.  */
3293                   if (sp->first != NULL)
3294                     lr_error (ldfile, _("\
3295 %s: multiple order definitions for section `%s'"),
3296                               "LC_COLLATE", sp->name);
3297                   else
3298                     {
3299                       /* Insert sp in the collate->sections list,
3300                          right after collate->current_section.  */
3301                       if (collate->current_section != NULL)
3302                         {
3303                           sp->next = collate->current_section->next;
3304                           collate->current_section->next = sp;
3305                         }
3306                       else if (collate->sections == NULL)
3307                         /* This is the first section to be defined.  */
3308                         collate->sections = sp;
3309
3310                       collate->current_section = sp;
3311                     }
3312
3313                   /* Next should come the end of the line or a semicolon.  */
3314                   arg = lr_token (ldfile, charmap, result, repertoire,
3315                                   verbose);
3316                   if (arg->tok == tok_eol)
3317                     {
3318                       uint32_t cnt;
3319
3320                       /* This means we have exactly one rule: `forward'.  */
3321                       if (nrules > 1)
3322                         lr_error (ldfile, _("\
3323 %s: invalid number of sorting rules"),
3324                                   "LC_COLLATE");
3325                       else
3326                         nrules = 1;
3327                       sp->rules = obstack_alloc (&collate->mempool,
3328                                                  (sizeof (enum coll_sort_rule)
3329                                                   * nrules));
3330                       for (cnt = 0; cnt < nrules; ++cnt)
3331                         sp->rules[cnt] = sort_forward;
3332
3333                       /* Next line.  */
3334                       break;
3335                     }
3336
3337                   /* Get the next token.  */
3338                   arg = lr_token (ldfile, charmap, result, repertoire,
3339                                   verbose);
3340                 }
3341             }
3342           else
3343             {
3344               /* There is no section symbol.  Therefore we use the unnamed
3345                  section.  */
3346               collate->current_section = &collate->unnamed_section;
3347
3348               if (collate->unnamed_section.first != NULL)
3349                 lr_error (ldfile, _("\
3350 %s: multiple order definitions for unnamed section"),
3351                           "LC_COLLATE");
3352               else
3353                 {
3354                   /* Insert &collate->unnamed_section at the beginning of
3355                      the collate->sections list.  */
3356                   collate->unnamed_section.next = collate->sections;
3357                   collate->sections = &collate->unnamed_section;
3358                 }
3359             }
3360
3361           /* Now read the direction names.  */
3362           read_directions (ldfile, arg, charmap, repertoire, result);
3363
3364           /* From now we need the strings untranslated.  */
3365           ldfile->translate_strings = 0;
3366           break;
3367
3368         case tok_order_end:
3369           /* Ignore the rest of the line if we don't need the input of
3370              this line.  */
3371           if (ignore_content)
3372             {
3373               lr_ignore_rest (ldfile, 0);
3374               break;
3375             }
3376
3377           if (state != 1)
3378             goto err_label;
3379
3380           /* Handle ellipsis at end of list.  */
3381           if (was_ellipsis != tok_none)
3382             {
3383               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3384                                repertoire, result);
3385               was_ellipsis = tok_none;
3386             }
3387
3388           state = 2;
3389           lr_ignore_rest (ldfile, 1);
3390           break;
3391
3392         case tok_reorder_after:
3393           /* Ignore the rest of the line if we don't need the input of
3394              this line.  */
3395           if (ignore_content)
3396             {
3397               lr_ignore_rest (ldfile, 0);
3398               break;
3399             }
3400
3401           if (state == 1)
3402             {
3403               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3404                         "LC_COLLATE");
3405               state = 2;
3406
3407               /* Handle ellipsis at end of list.  */
3408               if (was_ellipsis != tok_none)
3409                 {
3410                   handle_ellipsis (ldfile, arg->val.str.startmb,
3411                                    arg->val.str.lenmb, was_ellipsis, charmap,
3412                                    repertoire, result);
3413                   was_ellipsis = tok_none;
3414                 }
3415             }
3416           else if (state == 0 && copy_locale == NULL)
3417             goto err_label;
3418           else if (state != 0 && state != 2 && state != 3)
3419             goto err_label;
3420           state = 3;
3421
3422           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3423           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3424             {
3425               /* Find this symbol in the sequence table.  */
3426               char ucsbuf[10];
3427               char *startmb;
3428               size_t lenmb;
3429               struct element_t *insp;
3430               int no_error = 1;
3431               void *ptr;
3432
3433               if (arg->tok == tok_bsymbol)
3434                 {
3435                   startmb = arg->val.str.startmb;
3436                   lenmb = arg->val.str.lenmb;
3437                 }
3438               else
3439                 {
3440                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3441                   startmb = ucsbuf;
3442                   lenmb = 9;
3443                 }
3444
3445               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3446                 /* Yes, the symbol exists.  Simply point the cursor
3447                    to it.  */
3448                 collate->cursor = (struct element_t *) ptr;
3449               else
3450                 {
3451                   struct symbol_t *symbp;
3452                   void *ptr;
3453
3454                   if (find_entry (&collate->sym_table, startmb, lenmb,
3455                                   &ptr) == 0)
3456                     {
3457                       symbp = ptr;
3458
3459                       if (symbp->order->last != NULL
3460                           || symbp->order->next != NULL)
3461                         collate->cursor = symbp->order;
3462                       else
3463                         {
3464                           /* This is a collating symbol but its position
3465                              is not yet defined.  */
3466                           lr_error (ldfile, _("\
3467 %s: order for collating symbol %.*s not yet defined"),
3468                                     "LC_COLLATE", (int) lenmb, startmb);
3469                           collate->cursor = NULL;
3470                           no_error = 0;
3471                         }
3472                     }
3473                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3474                                        &ptr) == 0)
3475                     {
3476                       insp = (struct element_t *) ptr;
3477
3478                       if (insp->last != NULL || insp->next != NULL)
3479                         collate->cursor = insp;
3480                       else
3481                         {
3482                           /* This is a collating element but its position
3483                              is not yet defined.  */
3484                           lr_error (ldfile, _("\
3485 %s: order for collating element %.*s not yet defined"),
3486                                     "LC_COLLATE", (int) lenmb, startmb);
3487                           collate->cursor = NULL;
3488                           no_error = 0;
3489                         }
3490                     }
3491                   else
3492                     {
3493                       /* This is bad.  The symbol after which we have to
3494                          insert does not exist.  */
3495                       lr_error (ldfile, _("\
3496 %s: cannot reorder after %.*s: symbol not known"),
3497                                 "LC_COLLATE", (int) lenmb, startmb);
3498                       collate->cursor = NULL;
3499                       no_error = 0;
3500                     }
3501                 }
3502
3503               lr_ignore_rest (ldfile, no_error);
3504             }
3505           else
3506             /* This must not happen.  */
3507             goto err_label;
3508           break;
3509
3510         case tok_reorder_end:
3511           /* Ignore the rest of the line if we don't need the input of
3512              this line.  */
3513           if (ignore_content)
3514             break;
3515
3516           if (state != 3)
3517             goto err_label;
3518           state = 4;
3519           lr_ignore_rest (ldfile, 1);
3520           break;
3521
3522         case tok_reorder_sections_after:
3523           /* Ignore the rest of the line if we don't need the input of
3524              this line.  */
3525           if (ignore_content)
3526             {
3527               lr_ignore_rest (ldfile, 0);
3528               break;
3529             }
3530
3531           if (state == 1)
3532             {
3533               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3534                         "LC_COLLATE");
3535               state = 2;
3536
3537               /* Handle ellipsis at end of list.  */
3538               if (was_ellipsis != tok_none)
3539                 {
3540                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3541                                    repertoire, result);
3542                   was_ellipsis = tok_none;
3543                 }
3544             }
3545           else if (state == 3)
3546             {
3547               WITH_CUR_LOCALE (error (0, 0, _("\
3548 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3549               state = 4;
3550             }
3551           else if (state != 2 && state != 4)
3552             goto err_label;
3553           state = 5;
3554
3555           /* Get the name of the sections we are adding after.  */
3556           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3557           if (arg->tok == tok_bsymbol)
3558             {
3559               /* Now find a section with this name.  */
3560               struct section_list *runp = collate->sections;
3561
3562               while (runp != NULL)
3563                 {
3564                   if (runp->name != NULL
3565                       && strlen (runp->name) == arg->val.str.lenmb
3566                       && memcmp (runp->name, arg->val.str.startmb,
3567                                  arg->val.str.lenmb) == 0)
3568                     break;
3569
3570                   runp = runp->next;
3571                 }
3572
3573               if (runp != NULL)
3574                 collate->current_section = runp;
3575               else
3576                 {
3577                   /* This is bad.  The section after which we have to
3578                      reorder does not exist.  Therefore we cannot
3579                      process the whole rest of this reorder
3580                      specification.  */
3581                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3582                             "LC_COLLATE", (int) arg->val.str.lenmb,
3583                             arg->val.str.startmb);
3584
3585                   do
3586                     {
3587                       lr_ignore_rest (ldfile, 0);
3588
3589                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3590                     }
3591                   while (now->tok == tok_reorder_sections_after
3592                          || now->tok == tok_reorder_sections_end
3593                          || now->tok == tok_end);
3594
3595                   /* Process the token we just saw.  */
3596                   nowtok = now->tok;
3597                   continue;
3598                 }
3599             }
3600           else
3601             /* This must not happen.  */
3602             goto err_label;
3603           break;
3604
3605         case tok_reorder_sections_end:
3606           /* Ignore the rest of the line if we don't need the input of
3607              this line.  */
3608           if (ignore_content)
3609             break;
3610
3611           if (state != 5)
3612             goto err_label;
3613           state = 6;
3614           lr_ignore_rest (ldfile, 1);
3615           break;
3616
3617         case tok_bsymbol:
3618         case tok_ucs4:
3619           /* Ignore the rest of the line if we don't need the input of
3620              this line.  */
3621           if (ignore_content)
3622             {
3623               lr_ignore_rest (ldfile, 0);
3624               break;
3625             }
3626
3627           if (state != 0 && state != 1 && state != 3 && state != 5)
3628             goto err_label;
3629
3630           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3631             goto err_label;
3632
3633           if (nowtok == tok_ucs4)
3634             {
3635               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3636               symstr = ucs4buf;
3637               symlen = 9;
3638             }
3639           else if (arg != NULL)
3640             {
3641               symstr = arg->val.str.startmb;
3642               symlen = arg->val.str.lenmb;
3643             }
3644           else
3645             {
3646               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3647                         (int) ldfile->token.val.str.lenmb,
3648                         ldfile->token.val.str.startmb);
3649               break;
3650             }
3651
3652           struct element_t *seqp;
3653           if (state == 0)
3654             {
3655               /* We are outside an `order_start' region.  This means
3656                  we must only accept definitions of values for
3657                  collation symbols since these are purely abstract
3658                  values and don't need directions associated.  */
3659               void *ptr;
3660
3661               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3662                 {
3663                   seqp = ptr;
3664
3665                   /* It's already defined.  First check whether this
3666                      is really a collating symbol.  */
3667                   if (seqp->is_character)
3668                     goto err_label;
3669
3670                   goto move_entry;
3671                 }
3672               else
3673                 {
3674                   void *result;
3675
3676                   if (find_entry (&collate->sym_table, symstr, symlen,
3677                                   &result) != 0)
3678                     /* No collating symbol, it's an error.  */
3679                     goto err_label;
3680
3681                   /* Maybe this is the first time we define a symbol
3682                      value and it is before the first actual section.  */
3683                   if (collate->sections == NULL)
3684                     collate->sections = collate->current_section =
3685                       &collate->symbol_section;
3686                 }
3687
3688               if (was_ellipsis != tok_none)
3689                 {
3690                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3691                                    charmap, repertoire, result);
3692
3693                   /* Remember that we processed the ellipsis.  */
3694                   was_ellipsis = tok_none;
3695
3696                   /* And don't add the value a second time.  */
3697                   break;
3698                 }
3699             }
3700           else if (state == 3)
3701             {
3702               /* It is possible that we already have this collation sequence.
3703                  In this case we move the entry.  */
3704               void *sym;
3705               void *ptr;
3706
3707               /* If the symbol after which we have to insert was not found
3708                  ignore all entries.  */
3709               if (collate->cursor == NULL)
3710                 {
3711                   lr_ignore_rest (ldfile, 0);
3712                   break;
3713                 }
3714
3715               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3716                 {
3717                   seqp = (struct element_t *) ptr;
3718                   goto move_entry;
3719                 }
3720
3721               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3722                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3723                 goto move_entry;
3724
3725               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3726                   && (seqp = (struct element_t *) ptr,
3727                       seqp->last != NULL || seqp->next != NULL
3728                       || (collate->start != NULL && seqp == collate->start)))
3729                 {
3730                 move_entry:
3731                   /* Remove the entry from the old position.  */
3732                   if (seqp->last == NULL)
3733                     collate->start = seqp->next;
3734                   else
3735                     seqp->last->next = seqp->next;
3736                   if (seqp->next != NULL)
3737                     seqp->next->last = seqp->last;
3738
3739                   /* We also have to check whether this entry is the
3740                      first or last of a section.  */
3741                   if (seqp->section->first == seqp)
3742                     {
3743                       if (seqp->section->first == seqp->section->last)
3744                         /* This section has no content anymore.  */
3745                         seqp->section->first = seqp->section->last = NULL;
3746                       else
3747                         seqp->section->first = seqp->next;
3748                     }
3749                   else if (seqp->section->last == seqp)
3750                     seqp->section->last = seqp->last;
3751
3752                   /* Now insert it in the new place.  */
3753                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3754                                   tok_none);
3755                   break;
3756                 }
3757
3758               /* Otherwise we just add a new entry.  */
3759             }
3760           else if (state == 5)
3761             {
3762               /* We are reordering sections.  Find the named section.  */
3763               struct section_list *runp = collate->sections;
3764               struct section_list *prevp = NULL;
3765
3766               while (runp != NULL)
3767                 {
3768                   if (runp->name != NULL
3769                       && strlen (runp->name) == symlen
3770                       && memcmp (runp->name, symstr, symlen) == 0)
3771                     break;
3772
3773                   prevp = runp;
3774                   runp = runp->next;
3775                 }
3776
3777               if (runp == NULL)
3778                 {
3779                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3780                             "LC_COLLATE", (int) symlen, symstr);
3781                   lr_ignore_rest (ldfile, 0);
3782                 }
3783               else
3784                 {
3785                   if (runp != collate->current_section)
3786                     {
3787                       /* Remove the named section from the old place and
3788                          insert it in the new one.  */
3789                       prevp->next = runp->next;
3790
3791                       runp->next = collate->current_section->next;
3792                       collate->current_section->next = runp;
3793                       collate->current_section = runp;
3794                     }
3795
3796                   /* Process the rest of the line which might change
3797                      the collation rules.  */
3798                   arg = lr_token (ldfile, charmap, result, repertoire,
3799                                   verbose);
3800                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3801                     read_directions (ldfile, arg, charmap, repertoire,
3802                                      result);
3803                 }
3804               break;
3805             }
3806           else if (was_ellipsis != tok_none)
3807             {
3808               /* Using the information in the `ellipsis_weight'
3809                  element and this and the last value we have to handle
3810                  the ellipsis now.  */
3811               assert (state == 1);
3812
3813               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3814                                repertoire, result);
3815
3816               /* Remember that we processed the ellipsis.  */
3817               was_ellipsis = tok_none;
3818
3819               /* And don't add the value a second time.  */
3820               break;
3821             }
3822
3823           /* Now insert in the new place.  */
3824           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3825           break;
3826
3827         case tok_undefined:
3828           /* Ignore the rest of the line if we don't need the input of
3829              this line.  */
3830           if (ignore_content)
3831             {
3832               lr_ignore_rest (ldfile, 0);
3833               break;
3834             }
3835
3836           if (state != 1)
3837             goto err_label;
3838
3839           if (was_ellipsis != tok_none)
3840             {
3841               lr_error (ldfile,
3842                         _("%s: cannot have `%s' as end of ellipsis range"),
3843                         "LC_COLLATE", "UNDEFINED");
3844
3845               unlink_element (collate);
3846               was_ellipsis = tok_none;
3847             }
3848
3849           /* See whether UNDEFINED already appeared somewhere.  */
3850           if (collate->undefined.next != NULL
3851               || &collate->undefined == collate->cursor)
3852             {
3853               lr_error (ldfile,
3854                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3855                         "LC_COLLATE", 9, "UNDEFINED",
3856                         collate->undefined.file,
3857                         collate->undefined.line);
3858               lr_ignore_rest (ldfile, 0);
3859             }
3860           else
3861             /* Parse the weights.  */
3862              insert_weights (ldfile, &collate->undefined, charmap,
3863                              repertoire, result, tok_none);
3864           break;
3865
3866         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3867         case tok_ellipsis3: /* absolute ellipsis */
3868         case tok_ellipsis4: /* symbolic decimal ellipsis */
3869           /* This is the symbolic (decimal or hexadecimal) or absolute
3870              ellipsis.  */
3871           if (was_ellipsis != tok_none)
3872             goto err_label;
3873
3874           if (state != 0 && state != 1 && state != 3)
3875             goto err_label;
3876
3877           was_ellipsis = nowtok;
3878
3879           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3880                           repertoire, result, nowtok);
3881           break;
3882
3883         case tok_end:
3884         seen_end:
3885           /* Next we assume `LC_COLLATE'.  */
3886           if (!ignore_content)
3887             {
3888               if (state == 0 && copy_locale == NULL)
3889                 /* We must either see a copy statement or have
3890                    ordering values.  */
3891                 lr_error (ldfile,
3892                           _("%s: empty category description not allowed"),
3893                           "LC_COLLATE");
3894               else if (state == 1)
3895                 {
3896                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3897                             "LC_COLLATE");
3898
3899                   /* Handle ellipsis at end of list.  */
3900                   if (was_ellipsis != tok_none)
3901                     {
3902                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3903                                        repertoire, result);
3904                       was_ellipsis = tok_none;
3905                     }
3906                 }
3907               else if (state == 3)
3908                 WITH_CUR_LOCALE (error (0, 0, _("\
3909 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3910               else if (state == 5)
3911                 WITH_CUR_LOCALE (error (0, 0, _("\
3912 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3913             }
3914           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3915           if (arg->tok == tok_eof)
3916             break;
3917           if (arg->tok == tok_eol)
3918             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3919           else if (arg->tok != tok_lc_collate)
3920             lr_error (ldfile, _("\
3921 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3922           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3923           return;
3924
3925         case tok_define:
3926           if (ignore_content)
3927             {
3928               lr_ignore_rest (ldfile, 0);
3929               break;
3930             }
3931
3932           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3933           if (arg->tok != tok_ident)
3934             goto err_label;
3935
3936           /* Simply add the new symbol.  */
3937           struct name_list *newsym = xmalloc (sizeof (*newsym)
3938                                               + arg->val.str.lenmb + 1);
3939           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3940           newsym->str[arg->val.str.lenmb] = '\0';
3941           newsym->next = defined;
3942           defined = newsym;
3943
3944           lr_ignore_rest (ldfile, 1);
3945           break;
3946
3947         case tok_undef:
3948           if (ignore_content)
3949             {
3950               lr_ignore_rest (ldfile, 0);
3951               break;
3952             }
3953
3954           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3955           if (arg->tok != tok_ident)
3956             goto err_label;
3957
3958           /* Remove _all_ occurrences of the symbol from the list.  */
3959           struct name_list *prevdef = NULL;
3960           struct name_list *curdef = defined;
3961           while (curdef != NULL)
3962             if (strncmp (arg->val.str.startmb, curdef->str,
3963                          arg->val.str.lenmb) == 0
3964                 && curdef->str[arg->val.str.lenmb] == '\0')
3965               {
3966                 if (prevdef == NULL)
3967                   defined = curdef->next;
3968                 else
3969                   prevdef->next = curdef->next;
3970
3971                 struct name_list *olddef = curdef;
3972                 curdef = curdef->next;
3973
3974                 free (olddef);
3975               }
3976             else
3977               {
3978                 prevdef = curdef;
3979                 curdef = curdef->next;
3980               }
3981
3982           lr_ignore_rest (ldfile, 1);
3983           break;
3984
3985         case tok_ifdef:
3986         case tok_ifndef:
3987           if (ignore_content)
3988             {
3989               lr_ignore_rest (ldfile, 0);
3990               break;
3991             }
3992
3993         found_ifdef:
3994           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3995           if (arg->tok != tok_ident)
3996             goto err_label;
3997           lr_ignore_rest (ldfile, 1);
3998
3999           if (collate->else_action == else_none)
4000             {
4001               curdef = defined;
4002               while (curdef != NULL)
4003                 if (strncmp (arg->val.str.startmb, curdef->str,
4004                              arg->val.str.lenmb) == 0
4005                     && curdef->str[arg->val.str.lenmb] == '\0')
4006                   break;
4007                 else
4008                   curdef = curdef->next;
4009
4010               if ((nowtok == tok_ifdef && curdef != NULL)
4011                   || (nowtok == tok_ifndef && curdef == NULL))
4012                 {
4013                   /* We have to use the if-branch.  */
4014                   collate->else_action = else_ignore;
4015                 }
4016               else
4017                 {
4018                   /* We have to use the else-branch, if there is one.  */
4019                   nowtok = skip_to (ldfile, collate, charmap, 0);
4020                   if (nowtok == tok_else)
4021                     collate->else_action = else_seen;
4022                   else if (nowtok == tok_elifdef)
4023                     {
4024                       nowtok = tok_ifdef;
4025                       goto found_ifdef;
4026                     }
4027                   else if (nowtok == tok_elifndef)
4028                     {
4029                       nowtok = tok_ifndef;
4030                       goto found_ifdef;
4031                     }
4032                   else if (nowtok == tok_eof)
4033                     goto seen_eof;
4034                   else if (nowtok == tok_end)
4035                     goto seen_end;
4036                 }
4037             }
4038           else
4039             {
4040               /* XXX Should it really become necessary to support nested
4041                  preprocessor handling we will push the state here.  */
4042               lr_error (ldfile, _("%s: nested conditionals not supported"),
4043                         "LC_COLLATE");
4044               nowtok = skip_to (ldfile, collate, charmap, 1);
4045               if (nowtok == tok_eof)
4046                 goto seen_eof;
4047               else if (nowtok == tok_end)
4048                 goto seen_end;
4049             }
4050           break;
4051
4052         case tok_elifdef:
4053         case tok_elifndef:
4054         case tok_else:
4055           if (ignore_content)
4056             {
4057               lr_ignore_rest (ldfile, 0);
4058               break;
4059             }
4060
4061           lr_ignore_rest (ldfile, 1);
4062
4063           if (collate->else_action == else_ignore)
4064             {
4065               /* Ignore everything until the endif.  */
4066               nowtok = skip_to (ldfile, collate, charmap, 1);
4067               if (nowtok == tok_eof)
4068                 goto seen_eof;
4069               else if (nowtok == tok_end)
4070                 goto seen_end;
4071             }
4072           else
4073             {
4074               assert (collate->else_action == else_none);
4075               lr_error (ldfile, _("\
4076 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
4077                         nowtok == tok_else ? "else"
4078                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
4079             }
4080           break;
4081
4082         case tok_endif:
4083           if (ignore_content)
4084             {
4085               lr_ignore_rest (ldfile, 0);
4086               break;
4087             }
4088
4089           lr_ignore_rest (ldfile, 1);
4090
4091           if (collate->else_action != else_ignore
4092               && collate->else_action != else_seen)
4093             lr_error (ldfile, _("\
4094 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
4095
4096           /* XXX If we support nested preprocessor directives we pop
4097              the state here.  */
4098           collate->else_action = else_none;
4099           break;
4100
4101         default:
4102         err_label:
4103           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
4104         }
4105
4106       /* Prepare for the next round.  */
4107       now = lr_token (ldfile, charmap, result, NULL, verbose);
4108       nowtok = now->tok;
4109     }
4110
4111  seen_eof:
4112   /* When we come here we reached the end of the file.  */
4113   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
4114 }