locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <errno.h>
  24 #include <error.h>
  25 #include <stdlib.h>
  26 #include <wchar.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35
  36 /* Uncomment the following line in the production version.  */
  37 /* #define NDEBUG 1 */
  38 #include <assert.h>
  39
  40 #define obstack_chunk_alloc malloc
  41 #define obstack_chunk_free free
  42
  43 static inline void
  44 __attribute ((always_inline))
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 __attribute ((always_inline))
  55 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  56 {
  57   if (sizeof (int32_t) == sizeof (int))
  58     obstack_int_grow_fast (obstack, data);
  59   else
  60     obstack_grow (obstack, &data, sizeof (int32_t));
  61 }
  62
  63 /* Forward declaration.  */
  64 struct element_t;
  65
  66 /* Data type for list of strings.  */
  67 struct section_list
  68 {
  69   /* Successor in the known_sections list.  */
  70   struct section_list *def_next;
  71   /* Successor in the sections list.  */
  72   struct section_list *next;
  73   /* Name of the section.  */
  74   const char *name;
  75   /* First element of this section.  */
  76   struct element_t *first;
  77   /* Last element of this section.  */
  78   struct element_t *last;
  79   /* These are the rules for this section.  */
  80   enum coll_sort_rule *rules;
  81   /* Index of the rule set in the appropriate section of the output file.  */
  82   int ruleidx;
  83 };
  84
  85 struct element_t;
  86
  87 struct element_list_t
  88 {
  89   /* Number of elements.  */
  90   int cnt;
  91
  92   struct element_t **w;
  93 };
  94
  95 /* Data type for collating element.  */
  96 struct element_t
  97 {
  98   const char *name;
  99
 100   const char *mbs;
 101   size_t nmbs;
 102   const uint32_t *wcs;
 103   size_t nwcs;
 104   int *mborder;
 105   int wcorder;
 106
 107   /* The following is a bit mask which bits are set if this element is
 108      used in the appropriate level.  Interesting for the singlebyte
 109      weight computation.
 110
 111      XXX The type here restricts the number of levels to 32.  It could
 112      be changed if necessary but I doubt this is necessary.  */
 113   unsigned int used_in_level;
 114
 115   struct element_list_t *weights;
 116
 117   /* Nonzero if this is a real character definition.  */
 118   int is_character;
 119
 120   /* Order of the character in the sequence.  This information will
 121      be used in range expressions.  */
 122   int mbseqorder;
 123   int wcseqorder;
 124
 125   /* Where does the definition come from.  */
 126   const char *file;
 127   size_t line;
 128
 129   /* Which section does this belong to.  */
 130   struct section_list *section;
 131
 132   /* Predecessor and successor in the order list.  */
 133   struct element_t *last;
 134   struct element_t *next;
 135
 136   /* Next element in multibyte output list.  */
 137   struct element_t *mbnext;
 138   struct element_t *mblast;
 139
 140   /* Next element in wide character output list.  */
 141   struct element_t *wcnext;
 142   struct element_t *wclast;
 143 };
 144
 145 /* Special element value.  */
 146 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 147 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 148 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 149
 150 /* Data type for collating symbol.  */
 151 struct symbol_t
 152 {
 153   const char *name;
 154
 155   /* Point to place in the order list.  */
 156   struct element_t *order;
 157
 158   /* Where does the definition come from.  */
 159   const char *file;
 160   size_t line;
 161 };
 162
 163 /* Sparse table of struct element_t *.  */
 164 #define TABLE wchead_table
 165 #define ELEMENT struct element_t *
 166 #define DEFAULT NULL
 167 #define ITERATE
 168 #define NO_FINALIZE
 169 #include "3level.h"
 170
 171 /* Sparse table of int32_t.  */
 172 #define TABLE collidx_table
 173 #define ELEMENT int32_t
 174 #define DEFAULT 0
 175 #include "3level.h"
 176
 177 /* Sparse table of uint32_t.  */
 178 #define TABLE collseq_table
 179 #define ELEMENT uint32_t
 180 #define DEFAULT ~((uint32_t) 0)
 181 #include "3level.h"
 182
 183
 184 /* The real definition of the struct for the LC_COLLATE locale.  */
 185 struct locale_collate_t
 186 {
 187   int col_weight_max;
 188   int cur_weight_max;
 189
 190   /* List of known scripts.  */
 191   struct section_list *known_sections;
 192   /* List of used sections.  */
 193   struct section_list *sections;
 194   /* Current section using definition.  */
 195   struct section_list *current_section;
 196   /* There always can be an unnamed section.  */
 197   struct section_list unnamed_section;
 198   /* To make handling of errors easier we have another section.  */
 199   struct section_list error_section;
 200   /* Sometimes we are defining the values for collating symbols before
 201      the first actual section.  */
 202   struct section_list symbol_section;
 203
 204   /* Start of the order list.  */
 205   struct element_t *start;
 206
 207   /* The undefined element.  */
 208   struct element_t undefined;
 209
 210   /* This is the cursor for `reorder_after' insertions.  */
 211   struct element_t *cursor;
 212
 213   /* This value is used when handling ellipsis.  */
 214   struct element_t ellipsis_weight;
 215
 216   /* Known collating elements.  */
 217   hash_table elem_table;
 218
 219   /* Known collating symbols.  */
 220   hash_table sym_table;
 221
 222   /* Known collation sequences.  */
 223   hash_table seq_table;
 224
 225   struct obstack mempool;
 226
 227   /* The LC_COLLATE category is a bit special as it is sometimes possible
 228      that the definitions from more than one input file contains information.
 229      Therefore we keep all relevant input in a list.  */
 230   struct locale_collate_t *next;
 231
 232   /* Arrays with heads of the list for each of the leading bytes in
 233      the multibyte sequences.  */
 234   struct element_t *mbheads[256];
 235
 236   /* Arrays with heads of the list for each of the leading bytes in
 237      the multibyte sequences.  */
 238   struct wchead_table wcheads;
 239
 240   /* The arrays with the collation sequence order.  */
 241   unsigned char mbseqorder[256];
 242   struct collseq_table wcseqorder;
 243 };
 244
 245
 246 /* We have a few global variables which are used for reading all
 247    LC_COLLATE category descriptions in all files.  */
 248 static uint32_t nrules;
 249
 250
 251 /* We need UTF-8 encoding of numbers.  */
 252 static inline int
 253 __attribute ((always_inline))
 254 utf8_encode (char *buf, int val)
 255 {
 256   int retval;
 257
 258   if (val < 0x80)
 259     {
 260       *buf++ = (char) val;
 261       retval = 1;
 262     }
 263   else
 264     {
 265       int step;
 266
 267       for (step = 2; step < 6; ++step)
 268         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 269           break;
 270       retval = step;
 271
 272       *buf = (unsigned char) (~0xff >> step);
 273       --step;
 274       do
 275         {
 276           buf[step] = 0x80 | (val & 0x3f);
 277           val >>= 6;
 278         }
 279       while (--step > 0);
 280       *buf |= val;
 281     }
 282
 283   return retval;
 284 }
 285
 286
 287 static struct section_list *
 288 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 289                    struct section_list *next)
 290 {
 291   struct section_list *newp;
 292
 293   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 294                                                 sizeof (*newp));
 295   newp->next = next;
 296   newp->name = string;
 297   newp->first = NULL;
 298   newp->last = NULL;
 299
 300   return newp;
 301 }
 302
 303
 304 static struct element_t *
 305 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 306              const uint32_t *wcs, const char *name, size_t namelen,
 307              int is_character)
 308 {
 309   struct element_t *newp;
 310
 311   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 312                                              sizeof (*newp));
 313   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 314                                                     name, namelen);
 315   if (mbs != NULL)
 316     {
 317       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 318       newp->nmbs = mbslen;
 319     }
 320   else
 321     {
 322       newp->mbs = NULL;
 323       newp->nmbs = 0;
 324     }
 325   if (wcs != NULL)
 326     {
 327       size_t nwcs = wcslen ((wchar_t *) wcs);
 328       uint32_t zero = 0;
 329       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 330       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 331       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 332       newp->nwcs = nwcs;
 333     }
 334   else
 335     {
 336       newp->wcs = NULL;
 337       newp->nwcs = 0;
 338     }
 339   newp->mborder = NULL;
 340   newp->wcorder = 0;
 341   newp->used_in_level = 0;
 342   newp->is_character = is_character;
 343
 344   /* Will be assigned later.  XXX  */
 345   newp->mbseqorder = 0;
 346   newp->wcseqorder = 0;
 347
 348   /* Will be allocated later.  */
 349   newp->weights = NULL;
 350
 351   newp->file = NULL;
 352   newp->line = 0;
 353
 354   newp->section = collate->current_section;
 355
 356   newp->last = NULL;
 357   newp->next = NULL;
 358
 359   newp->mbnext = NULL;
 360   newp->mblast = NULL;
 361
 362   newp->wcnext = NULL;
 363   newp->wclast = NULL;
 364
 365   return newp;
 366 }
 367
 368
 369 static struct symbol_t *
 370 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 371 {
 372   struct symbol_t *newp;
 373
 374   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 375
 376   newp->name = obstack_copy0 (&collate->mempool, name, len);
 377   newp->order = NULL;
 378
 379   newp->file = NULL;
 380   newp->line = 0;
 381
 382   return newp;
 383 }
 384
 385
 386 /* Test whether this name is already defined somewhere.  */
 387 static int
 388 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 389                  const struct charmap_t *charmap,
 390                  struct repertoire_t *repertoire, const char *symbol,
 391                  size_t symbol_len)
 392 {
 393   void *ignore = NULL;
 394
 395   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 396     {
 397       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 398                 (int) symbol_len, symbol);
 399       return 1;
 400     }
 401
 402   if (repertoire != NULL
 403       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 404           == 0))
 405     {
 406       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 407                 (int) symbol_len, symbol);
 408       return 1;
 409     }
 410
 411   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 412     {
 413       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 414                 (int) symbol_len, symbol);
 415       return 1;
 416     }
 417
 418   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 419     {
 420       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 421                 (int) symbol_len, symbol);
 422       return 1;
 423     }
 424
 425   return 0;
 426 }
 427
 428
 429 /* Read the direction specification.  */
 430 static void
 431 read_directions (struct linereader *ldfile, struct token *arg,
 432                  const struct charmap_t *charmap,
 433                  struct repertoire_t *repertoire, struct localedef_t *result)
 434 {
 435   int cnt = 0;
 436   int max = nrules ?: 10;
 437   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 438   int warned = 0;
 439   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 440
 441   while (1)
 442     {
 443       int valid = 0;
 444
 445       if (arg->tok == tok_forward)
 446         {
 447           if (rules[cnt] & sort_backward)
 448             {
 449               if (! warned)
 450                 {
 451                   lr_error (ldfile, _("\
 452 %s: `forward' and `backward' are mutually excluding each other"),
 453                             "LC_COLLATE");
 454                   warned = 1;
 455                 }
 456             }
 457           else if (rules[cnt] & sort_forward)
 458             {
 459               if (! warned)
 460                 {
 461                   lr_error (ldfile, _("\
 462 %s: `%s' mentioned more than once in definition of weight %d"),
 463                             "LC_COLLATE", "forward", cnt + 1);
 464                 }
 465             }
 466           else
 467             rules[cnt] |= sort_forward;
 468
 469           valid = 1;
 470         }
 471       else if (arg->tok == tok_backward)
 472         {
 473           if (rules[cnt] & sort_forward)
 474             {
 475               if (! warned)
 476                 {
 477                   lr_error (ldfile, _("\
 478 %s: `forward' and `backward' are mutually excluding each other"),
 479                             "LC_COLLATE");
 480                   warned = 1;
 481                 }
 482             }
 483           else if (rules[cnt] & sort_backward)
 484             {
 485               if (! warned)
 486                 {
 487                   lr_error (ldfile, _("\
 488 %s: `%s' mentioned more than once in definition of weight %d"),
 489                             "LC_COLLATE", "backward", cnt + 1);
 490                 }
 491             }
 492           else
 493             rules[cnt] |= sort_backward;
 494
 495           valid = 1;
 496         }
 497       else if (arg->tok == tok_position)
 498         {
 499           if (rules[cnt] & sort_position)
 500             {
 501               if (! warned)
 502                 {
 503                   lr_error (ldfile, _("\
 504 %s: `%s' mentioned more than once in definition of weight %d"),
 505                             "LC_COLLATE", "position", cnt + 1);
 506                 }
 507             }
 508           else
 509             rules[cnt] |= sort_position;
 510
 511           valid = 1;
 512         }
 513
 514       if (valid)
 515         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 516
 517       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 518           || arg->tok == tok_semicolon)
 519         {
 520           if (! valid && ! warned)
 521             {
 522               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 523               warned = 1;
 524             }
 525
 526           /* See whether we have to increment the counter.  */
 527           if (arg->tok != tok_comma && rules[cnt] != 0)
 528             {
 529               /* Add the default `forward' if we have seen only `position'.  */
 530               if (rules[cnt] == sort_position)
 531                 rules[cnt] = sort_position | sort_forward;
 532
 533               ++cnt;
 534             }
 535
 536           if (arg->tok == tok_eof || arg->tok == tok_eol)
 537             /* End of line or file, so we exit the loop.  */
 538             break;
 539
 540           if (nrules == 0)
 541             {
 542               /* See whether we have enough room in the array.  */
 543               if (cnt == max)
 544                 {
 545                   max += 10;
 546                   rules = (enum coll_sort_rule *) xrealloc (rules,
 547                                                             max
 548                                                             * sizeof (*rules));
 549                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 550                 }
 551             }
 552           else
 553             {
 554               if (cnt == nrules)
 555                 {
 556                   /* There must not be any more rule.  */
 557                   if (! warned)
 558                     {
 559                       lr_error (ldfile, _("\
 560 %s: too many rules; first entry only had %d"),
 561                                 "LC_COLLATE", nrules);
 562                       warned = 1;
 563                     }
 564
 565                   lr_ignore_rest (ldfile, 0);
 566                   break;
 567                 }
 568             }
 569         }
 570       else
 571         {
 572           if (! warned)
 573             {
 574               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 575               warned = 1;
 576             }
 577         }
 578
 579       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 580     }
 581
 582   if (nrules == 0)
 583     {
 584       /* Now we know how many rules we have.  */
 585       nrules = cnt;
 586       rules = (enum coll_sort_rule *) xrealloc (rules,
 587                                                 nrules * sizeof (*rules));
 588     }
 589   else
 590     {
 591       if (cnt < nrules)
 592         {
 593           /* Not enough rules in this specification.  */
 594           if (! warned)
 595             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 596
 597           do
 598             rules[cnt] = sort_forward;
 599           while (++cnt < nrules);
 600         }
 601     }
 602
 603   collate->current_section->rules = rules;
 604 }
 605
 606
 607 static struct element_t *
 608 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 609               const char *str, size_t len)
 610 {
 611   void *result = NULL;
 612
 613   /* Search for the entries among the collation sequences already define.  */
 614   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 615     {
 616       /* Nope, not define yet.  So we see whether it is a
 617          collation symbol.  */
 618       void *ptr;
 619
 620       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 621         {
 622           /* It's a collation symbol.  */
 623           struct symbol_t *sym = (struct symbol_t *) ptr;
 624           result = sym->order;
 625
 626           if (result == NULL)
 627             result = sym->order = new_element (collate, NULL, 0, NULL,
 628                                                NULL, 0, 0);
 629         }
 630       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 631         {
 632           /* It's also no collation element.  So it is a character
 633              element defined later.  */
 634           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 635           /* Insert it into the sequence table.  */
 636           insert_entry (&collate->seq_table, str, len, result);
 637         }
 638     }
 639
 640   return (struct element_t *) result;
 641 }
 642
 643
 644 static void
 645 unlink_element (struct locale_collate_t *collate)
 646 {
 647   if (collate->cursor == collate->start)
 648     {
 649       assert (collate->cursor->next == NULL);
 650       assert (collate->cursor->last == NULL);
 651       collate->cursor = NULL;
 652     }
 653   else
 654     {
 655       if (collate->cursor->next != NULL)
 656         collate->cursor->next->last = collate->cursor->last;
 657       if (collate->cursor->last != NULL)
 658         collate->cursor->last->next = collate->cursor->next;
 659       collate->cursor = collate->cursor->last;
 660     }
 661 }
 662
 663
 664 static void
 665 insert_weights (struct linereader *ldfile, struct element_t *elem,
 666                 const struct charmap_t *charmap,
 667                 struct repertoire_t *repertoire, struct localedef_t *result,
 668                 enum token_t ellipsis)
 669 {
 670   int weight_cnt;
 671   struct token *arg;
 672   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 673
 674   /* Initialize all the fields.  */
 675   elem->file = ldfile->fname;
 676   elem->line = ldfile->lineno;
 677
 678   elem->last = collate->cursor;
 679   elem->next = collate->cursor ? collate->cursor->next : NULL;
 680   if (collate->cursor != NULL && collate->cursor->next != NULL)
 681     collate->cursor->next->last = elem;
 682   if (collate->cursor != NULL)
 683     collate->cursor->next = elem;
 684   if (collate->start == NULL)
 685     {
 686       assert (collate->cursor == NULL);
 687       collate->start = elem;
 688     }
 689
 690   elem->section = collate->current_section;
 691
 692   if (collate->current_section->first == NULL)
 693     collate->current_section->first = elem;
 694   if (collate->current_section->last == collate->cursor)
 695     collate->current_section->last = elem;
 696
 697   collate->cursor = elem;
 698
 699   elem->weights = (struct element_list_t *)
 700     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 701   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 702
 703   weight_cnt = 0;
 704
 705   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 706   do
 707     {
 708       if (arg->tok == tok_eof || arg->tok == tok_eol)
 709         break;
 710
 711       if (arg->tok == tok_ignore)
 712         {
 713           /* The weight for this level has to be ignored.  We use the
 714              null pointer to indicate this.  */
 715           elem->weights[weight_cnt].w = (struct element_t **)
 716             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 717           elem->weights[weight_cnt].w[0] = NULL;
 718           elem->weights[weight_cnt].cnt = 1;
 719         }
 720       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 721         {
 722           char ucs4str[10];
 723           struct element_t *val;
 724           char *symstr;
 725           size_t symlen;
 726
 727           if (arg->tok == tok_bsymbol)
 728             {
 729               symstr = arg->val.str.startmb;
 730               symlen = arg->val.str.lenmb;
 731             }
 732           else
 733             {
 734               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 735               symstr = ucs4str;
 736               symlen = 9;
 737             }
 738
 739           val = find_element (ldfile, collate, symstr, symlen);
 740           if (val == NULL)
 741             break;
 742
 743           elem->weights[weight_cnt].w = (struct element_t **)
 744             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 745           elem->weights[weight_cnt].w[0] = val;
 746           elem->weights[weight_cnt].cnt = 1;
 747         }
 748       else if (arg->tok == tok_string)
 749         {
 750           /* Split the string up in the individual characters and put
 751              the element definitions in the list.  */
 752           const char *cp = arg->val.str.startmb;
 753           int cnt = 0;
 754           struct element_t *charelem;
 755           struct element_t **weights = NULL;
 756           int max = 0;
 757
 758           if (*cp == '\0')
 759             {
 760               lr_error (ldfile, _("%s: empty weight string not allowed"),
 761                         "LC_COLLATE");
 762               lr_ignore_rest (ldfile, 0);
 763               break;
 764             }
 765
 766           do
 767             {
 768               if (*cp == '<')
 769                 {
 770                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 771                      the latter we have to unify the name.  */
 772                   const char *startp = ++cp;
 773                   size_t len;
 774
 775                   while (*cp != '>')
 776                     {
 777                       if (*cp == ldfile->escape_char)
 778                         ++cp;
 779                       if (*cp == '\0')
 780                         /* It's a syntax error.  */
 781                         goto syntax;
 782
 783                       ++cp;
 784                     }
 785
 786                   if (cp - startp == 5 && startp[0] == 'U'
 787                       && isxdigit (startp[1]) && isxdigit (startp[2])
 788                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 789                     {
 790                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 791                       char *newstr;
 792
 793                       newstr = (char *) xmalloc (10);
 794                       snprintf (newstr, 10, "U%08X", ucs4);
 795                       startp = newstr;
 796
 797                       len = 9;
 798                     }
 799                   else
 800                     len = cp - startp;
 801
 802                   charelem = find_element (ldfile, collate, startp, len);
 803                   ++cp;
 804                 }
 805               else
 806                 {
 807                   /* People really shouldn't use characters directly in
 808                      the string.  Especially since it's not really clear
 809                      what this means.  We interpret all characters in the
 810                      string as if that would be bsymbols.  Otherwise we
 811                      would have to match back to bsymbols somehow and this
 812                      is normally not what people normally expect.  */
 813                   charelem = find_element (ldfile, collate, cp++, 1);
 814                 }
 815
 816               if (charelem == NULL)
 817                 {
 818                   /* We ignore the rest of the line.  */
 819                   lr_ignore_rest (ldfile, 0);
 820                   break;
 821                 }
 822
 823               /* Add the pointer.  */
 824               if (cnt >= max)
 825                 {
 826                   struct element_t **newp;
 827                   max += 10;
 828                   newp = (struct element_t **)
 829                     alloca (max * sizeof (struct element_t *));
 830                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 831                   weights = newp;
 832                 }
 833               weights[cnt++] = charelem;
 834             }
 835           while (*cp != '\0');
 836
 837           /* Now store the information.  */
 838           elem->weights[weight_cnt].w = (struct element_t **)
 839             obstack_alloc (&collate->mempool,
 840                            cnt * sizeof (struct element_t *));
 841           memcpy (elem->weights[weight_cnt].w, weights,
 842                   cnt * sizeof (struct element_t *));
 843           elem->weights[weight_cnt].cnt = cnt;
 844
 845           /* We don't need the string anymore.  */
 846           free (arg->val.str.startmb);
 847         }
 848       else if (ellipsis != tok_none
 849                && (arg->tok == tok_ellipsis2
 850                    || arg->tok == tok_ellipsis3
 851                    || arg->tok == tok_ellipsis4))
 852         {
 853           /* It must be the same ellipsis as used in the initial column.  */
 854           if (arg->tok != ellipsis)
 855             lr_error (ldfile, _("\
 856 %s: weights must use the same ellipsis symbol as the name"),
 857                       "LC_COLLATE");
 858
 859           /* The weight for this level will depend on the element
 860              iterating over the range.  Put a placeholder.  */
 861           elem->weights[weight_cnt].w = (struct element_t **)
 862             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 863           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 864           elem->weights[weight_cnt].cnt = 1;
 865         }
 866       else
 867         {
 868         syntax:
 869           /* It's a syntax error.  */
 870           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 871           lr_ignore_rest (ldfile, 0);
 872           break;
 873         }
 874
 875       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 876       /* This better should be the end of the line or a semicolon.  */
 877       if (arg->tok == tok_semicolon)
 878         /* OK, ignore this and read the next token.  */
 879         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 880       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 881         {
 882           /* It's a syntax error.  */
 883           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 884           lr_ignore_rest (ldfile, 0);
 885           break;
 886         }
 887     }
 888   while (++weight_cnt < nrules);
 889
 890   if (weight_cnt < nrules)
 891     {
 892       /* This means the rest of the line uses the current element as
 893          the weight.  */
 894       do
 895         {
 896           elem->weights[weight_cnt].w = (struct element_t **)
 897             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 898           if (ellipsis == tok_none)
 899             elem->weights[weight_cnt].w[0] = elem;
 900           else
 901             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 902           elem->weights[weight_cnt].cnt = 1;
 903         }
 904       while (++weight_cnt < nrules);
 905     }
 906   else
 907     {
 908       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 909         {
 910           /* Too many rule values.  */
 911           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 912           lr_ignore_rest (ldfile, 0);
 913         }
 914       else
 915         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 916     }
 917 }
 918
 919
 920 static int
 921 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 922               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 923               struct localedef_t *result)
 924 {
 925   /* First find out what kind of symbol this is.  */
 926   struct charseq *seq;
 927   uint32_t wc;
 928   struct element_t *elem = NULL;
 929   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 930
 931   /* Try to find the character in the charmap.  */
 932   seq = charmap_find_value (charmap, symstr, symlen);
 933
 934   /* Determine the wide character.  */
 935   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 936     {
 937       wc = repertoire_find_value (repertoire, symstr, symlen);
 938       if (seq != NULL)
 939         seq->ucs4 = wc;
 940     }
 941   else
 942     wc = seq->ucs4;
 943
 944   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 945     {
 946       /* It's no character, so look through the collation elements and
 947          symbol list.  */
 948       void *ptr = elem;
 949       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 950         {
 951           void *result;
 952           struct symbol_t *sym = NULL;
 953
 954           /* It's also collation element.  Therefore it's either a
 955              collating symbol or it's a character which is not
 956              supported by the character set.  In the later case we
 957              simply create a dummy entry.  */
 958           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 959             {
 960               /* It's a collation symbol.  */
 961               sym = (struct symbol_t *) result;
 962
 963               elem = sym->order;
 964             }
 965
 966           if (elem == NULL)
 967             {
 968               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 969
 970               if (sym != NULL)
 971                 sym->order = elem;
 972               else
 973                 /* Enter a fake element in the sequence table.  This
 974                    won't cause anything in the output since there is
 975                    no multibyte or wide character associated with
 976                    it.  */
 977                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 978             }
 979         }
 980       else
 981         /* Copy the result back.  */
 982         elem = ptr;
 983     }
 984   else
 985     {
 986       /* Otherwise the symbols stands for a character.  */
 987       void *ptr = elem;
 988       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
 989         {
 990           uint32_t wcs[2] = { wc, 0 };
 991
 992           /* We have to allocate an entry.  */
 993           elem = new_element (collate,
 994                               seq != NULL ? (char *) seq->bytes : NULL,
 995                               seq != NULL ? seq->nbytes : 0,
 996                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 997                               symstr, symlen, 1);
 998
 999           /* And add it to the table.  */
1000           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1001             /* This cannot happen.  */
1002             assert (! "Internal error");
1003         }
1004       else
1005         {
1006           /* Copy the result back.  */
1007           elem = ptr;
1008
1009           /* Maybe the character was used before the definition.  In this case
1010              we have to insert the byte sequences now.  */
1011           if (elem->mbs == NULL && seq != NULL)
1012             {
1013               elem->mbs = obstack_copy0 (&collate->mempool,
1014                                          seq->bytes, seq->nbytes);
1015               elem->nmbs = seq->nbytes;
1016             }
1017
1018           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1019             {
1020               uint32_t wcs[2] = { wc, 0 };
1021
1022               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1023               elem->nwcs = 1;
1024             }
1025         }
1026     }
1027
1028   /* Test whether this element is not already in the list.  */
1029   if (elem->next != NULL || elem == collate->cursor)
1030     {
1031       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1032                 (int) symlen, symstr, elem->file, elem->line);
1033       lr_ignore_rest (ldfile, 0);
1034       return 1;
1035     }
1036
1037   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1038
1039   return 0;
1040 }
1041
1042
1043 static void
1044 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1045                  enum token_t ellipsis, const struct charmap_t *charmap,
1046                  struct repertoire_t *repertoire,
1047                  struct localedef_t *result)
1048 {
1049   struct element_t *startp;
1050   struct element_t *endp;
1051   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1052
1053   /* Unlink the entry added for the ellipsis.  */
1054   unlink_element (collate);
1055   startp = collate->cursor;
1056
1057   /* Process and add the end-entry.  */
1058   if (symstr != NULL
1059       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1060     /* Something went wrong with inserting the to-value.  This means
1061        we cannot process the ellipsis.  */
1062     return;
1063
1064   /* Reset the cursor.  */
1065   collate->cursor = startp;
1066
1067   /* Now we have to handle many different situations:
1068      - we have to distinguish between the three different ellipsis forms
1069      - the is the ellipsis at the beginning, in the middle, or at the end.
1070   */
1071   endp = collate->cursor->next;
1072   assert (symstr == NULL || endp != NULL);
1073
1074   /* XXX The following is probably very wrong since also collating symbols
1075      can appear in ranges.  But do we want/can refine the test for that?  */
1076 #if 0
1077   /* Both, the start and the end symbol, must stand for characters.  */
1078   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1079       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1080     {
1081       lr_error (ldfile, _("\
1082 %s: the start and the end symbol of a range must stand for characters"),
1083                 "LC_COLLATE");
1084       return;
1085     }
1086 #endif
1087
1088   if (ellipsis == tok_ellipsis3)
1089     {
1090       /* One requirement we make here: the length of the byte
1091          sequences for the first and end character must be the same.
1092          This is mainly to prevent unwanted effects and this is often
1093          not what is wanted.  */
1094       size_t len = (startp->mbs != NULL ? startp->nmbs
1095                     : (endp->mbs != NULL ? endp->nmbs : 0));
1096       char mbcnt[len + 1];
1097       char mbend[len + 1];
1098
1099       /* Well, this should be caught somewhere else already.  Just to
1100          make sure.  */
1101       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1102       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1103
1104       if (startp != NULL && endp != NULL
1105           && startp->mbs != NULL && endp->mbs != NULL
1106           && startp->nmbs != endp->nmbs)
1107         {
1108           lr_error (ldfile, _("\
1109 %s: byte sequences of first and last character must have the same length"),
1110                     "LC_COLLATE");
1111           return;
1112         }
1113
1114       /* Determine whether we have to generate multibyte sequences.  */
1115       if ((startp == NULL || startp->mbs != NULL)
1116           && (endp == NULL || endp->mbs != NULL))
1117         {
1118           int cnt;
1119           int ret;
1120
1121           /* Prepare the beginning byte sequence.  This is either from the
1122              beginning byte sequence or it is all nulls if it was an
1123              initial ellipsis.  */
1124           if (startp == NULL || startp->mbs == NULL)
1125             memset (mbcnt, '\0', len);
1126           else
1127             {
1128               memcpy (mbcnt, startp->mbs, len);
1129
1130               /* And increment it so that the value is the first one we will
1131                  try to insert.  */
1132               for (cnt = len - 1; cnt >= 0; --cnt)
1133                 if (++mbcnt[cnt] != '\0')
1134                   break;
1135             }
1136           mbcnt[len] = '\0';
1137
1138           /* And the end sequence.  */
1139           if (endp == NULL || endp->mbs == NULL)
1140             memset (mbend, '\0', len);
1141           else
1142             memcpy (mbend, endp->mbs, len);
1143           mbend[len] = '\0';
1144
1145           /* Test whether we have a correct range.  */
1146           ret = memcmp (mbcnt, mbend, len);
1147           if (ret >= 0)
1148             {
1149               if (ret > 0)
1150                 lr_error (ldfile, _("%s: byte sequence of first character of \
1151 range is not lower than that of the last character"), "LC_COLLATE");
1152               return;
1153             }
1154
1155           /* Generate the byte sequences data.  */
1156           while (1)
1157             {
1158               struct charseq *seq;
1159
1160               /* Quite a bit of work ahead.  We have to find the character
1161                  definition for the byte sequence and then determine the
1162                  wide character belonging to it.  */
1163               seq = charmap_find_symbol (charmap, mbcnt, len);
1164               if (seq != NULL)
1165                 {
1166                   struct element_t *elem;
1167                   size_t namelen;
1168
1169                   /* I don't think this can ever happen.  */
1170                   assert (seq->name != NULL);
1171                   namelen = strlen (seq->name);
1172
1173                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1174                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1175                                                        namelen);
1176
1177                   /* Now we are ready to insert the new value in the
1178                      sequence.  Find out whether the element is
1179                      already known.  */
1180                   void *ptr;
1181                   if (find_entry (&collate->seq_table, seq->name, namelen,
1182                                   &ptr) != 0)
1183                     {
1184                       uint32_t wcs[2] = { seq->ucs4, 0 };
1185
1186                       /* We have to allocate an entry.  */
1187                       elem = new_element (collate, mbcnt, len,
1188                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1189                                           ? NULL : wcs, seq->name,
1190                                           namelen, 1);
1191
1192                       /* And add it to the table.  */
1193                       if (insert_entry (&collate->seq_table, seq->name,
1194                                         namelen, elem) != 0)
1195                         /* This cannot happen.  */
1196                         assert (! "Internal error");
1197                     }
1198                   else
1199                     /* Copy the result.  */
1200                     elem = ptr;
1201
1202                   /* Test whether this element is not already in the list.  */
1203                   if (elem->next != NULL || (collate->cursor != NULL
1204                                              && elem->next == collate->cursor))
1205                     {
1206                       lr_error (ldfile, _("\
1207 order for `%.*s' already defined at %s:%Zu"),
1208                                 (int) namelen, seq->name,
1209                                 elem->file, elem->line);
1210                       goto increment;
1211                     }
1212
1213                   /* Enqueue the new element.  */
1214                   elem->last = collate->cursor;
1215                   if (collate->cursor == NULL)
1216                     elem->next = NULL;
1217                   else
1218                     {
1219                       elem->next = collate->cursor->next;
1220                       elem->last->next = elem;
1221                       if (elem->next != NULL)
1222                         elem->next->last = elem;
1223                     }
1224                   if (collate->start == NULL)
1225                     {
1226                       assert (collate->cursor == NULL);
1227                       collate->start = elem;
1228                     }
1229                   collate->cursor = elem;
1230
1231                  /* Add the weight value.  We take them from the
1232                     `ellipsis_weights' member of `collate'.  */
1233                   elem->weights = (struct element_list_t *)
1234                     obstack_alloc (&collate->mempool,
1235                                    nrules * sizeof (struct element_list_t));
1236                   for (cnt = 0; cnt < nrules; ++cnt)
1237                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1238                         && (collate->ellipsis_weight.weights[cnt].w[0]
1239                             == ELEMENT_ELLIPSIS2))
1240                       {
1241                         elem->weights[cnt].w = (struct element_t **)
1242                           obstack_alloc (&collate->mempool,
1243                                          sizeof (struct element_t *));
1244                         elem->weights[cnt].w[0] = elem;
1245                         elem->weights[cnt].cnt = 1;
1246                       }
1247                     else
1248                       {
1249                         /* Simply use the weight from `ellipsis_weight'.  */
1250                         elem->weights[cnt].w =
1251                           collate->ellipsis_weight.weights[cnt].w;
1252                         elem->weights[cnt].cnt =
1253                           collate->ellipsis_weight.weights[cnt].cnt;
1254                       }
1255                 }
1256
1257               /* Increment for the next round.  */
1258             increment:
1259               for (cnt = len - 1; cnt >= 0; --cnt)
1260                 if (++mbcnt[cnt] != '\0')
1261                   break;
1262
1263               /* Find out whether this was all.  */
1264               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1265                 /* Yep, that's all.  */
1266                 break;
1267             }
1268         }
1269     }
1270   else
1271     {
1272       /* For symbolic range we naturally must have a beginning and an
1273          end specified by the user.  */
1274       if (startp == NULL)
1275         lr_error (ldfile, _("\
1276 %s: symbolic range ellipsis must not directly follow `order_start'"),
1277                   "LC_COLLATE");
1278       else if (endp == NULL)
1279         lr_error (ldfile, _("\
1280 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1281                   "LC_COLLATE");
1282       else
1283         {
1284           /* Determine the range.  To do so we have to determine the
1285              common prefix of the both names and then the numeric
1286              values of both ends.  */
1287           size_t lenfrom = strlen (startp->name);
1288           size_t lento = strlen (endp->name);
1289           char buf[lento + 1];
1290           int preflen = 0;
1291           long int from;
1292           long int to;
1293           char *cp;
1294           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1295
1296           if (lenfrom != lento)
1297             {
1298             invalid_range:
1299               lr_error (ldfile, _("\
1300 `%s' and `%.*s' are not valid names for symbolic range"),
1301                         startp->name, (int) lento, endp->name);
1302               return;
1303             }
1304
1305           while (startp->name[preflen] == endp->name[preflen])
1306             if (startp->name[preflen] == '\0')
1307               /* Nothing to be done.  The start and end point are identical
1308                  and while inserting the end point we have already given
1309                  the user an error message.  */
1310               return;
1311             else
1312               ++preflen;
1313
1314           errno = 0;
1315           from = strtol (startp->name + preflen, &cp, base);
1316           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1317             goto invalid_range;
1318
1319           errno = 0;
1320           to = strtol (endp->name + preflen, &cp, base);
1321           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1322             goto invalid_range;
1323
1324           /* Copy the prefix.  */
1325           memcpy (buf, startp->name, preflen);
1326
1327           /* Loop over all values.  */
1328           for (++from; from < to; ++from)
1329             {
1330               struct element_t *elem = NULL;
1331               struct charseq *seq;
1332               uint32_t wc;
1333               int cnt;
1334
1335               /* Generate the name.  */
1336               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1337                        (int) (lenfrom - preflen), from);
1338
1339               /* Look whether this name is already defined.  */
1340               void *ptr;
1341               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1342                 {
1343                   /* Copy back the result.  */
1344                   elem = ptr;
1345
1346                   if (elem->next != NULL || (collate->cursor != NULL
1347                                              && elem->next == collate->cursor))
1348                     {
1349                       lr_error (ldfile, _("\
1350 %s: order for `%.*s' already defined at %s:%Zu"),
1351                                 "LC_COLLATE", (int) lenfrom, buf,
1352                                 elem->file, elem->line);
1353                       continue;
1354                     }
1355
1356                   if (elem->name == NULL)
1357                     {
1358                       lr_error (ldfile, _("%s: `%s' must be a character"),
1359                                 "LC_COLLATE", buf);
1360                       continue;
1361                     }
1362                 }
1363
1364               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1365                 {
1366                   /* Search for a character of this name.  */
1367                   seq = charmap_find_value (charmap, buf, lenfrom);
1368                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1369                     {
1370                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1371
1372                       if (seq != NULL)
1373                         seq->ucs4 = wc;
1374                     }
1375                   else
1376                     wc = seq->ucs4;
1377
1378                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1379                     /* We don't know anything about a character with this
1380                        name.  XXX Should we warn?  */
1381                     continue;
1382
1383                   if (elem == NULL)
1384                     {
1385                       uint32_t wcs[2] = { wc, 0 };
1386
1387                       /* We have to allocate an entry.  */
1388                       elem = new_element (collate,
1389                                           seq != NULL
1390                                           ? (char *) seq->bytes : NULL,
1391                                           seq != NULL ? seq->nbytes : 0,
1392                                           wc == ILLEGAL_CHAR_VALUE
1393                                           ? NULL : wcs, buf, lenfrom, 1);
1394                     }
1395                   else
1396                     {
1397                       /* Update the element.  */
1398                       if (seq != NULL)
1399                         {
1400                           elem->mbs = obstack_copy0 (&collate->mempool,
1401                                                      seq->bytes, seq->nbytes);
1402                           elem->nmbs = seq->nbytes;
1403                         }
1404
1405                       if (wc != ILLEGAL_CHAR_VALUE)
1406                         {
1407                           uint32_t zero = 0;
1408
1409                           obstack_grow (&collate->mempool,
1410                                         &wc, sizeof (uint32_t));
1411                           obstack_grow (&collate->mempool,
1412                                         &zero, sizeof (uint32_t));
1413                           elem->wcs = obstack_finish (&collate->mempool);
1414                           elem->nwcs = 1;
1415                         }
1416                     }
1417
1418                   elem->file = ldfile->fname;
1419                   elem->line = ldfile->lineno;
1420                   elem->section = collate->current_section;
1421                 }
1422
1423               /* Enqueue the new element.  */
1424               elem->last = collate->cursor;
1425               elem->next = collate->cursor->next;
1426               elem->last->next = elem;
1427               if (elem->next != NULL)
1428                 elem->next->last = elem;
1429               collate->cursor = elem;
1430
1431               /* Now add the weights.  They come from the `ellipsis_weights'
1432                  member of `collate'.  */
1433               elem->weights = (struct element_list_t *)
1434                 obstack_alloc (&collate->mempool,
1435                                nrules * sizeof (struct element_list_t));
1436               for (cnt = 0; cnt < nrules; ++cnt)
1437                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1438                     && (collate->ellipsis_weight.weights[cnt].w[0]
1439                         == ELEMENT_ELLIPSIS2))
1440                   {
1441                     elem->weights[cnt].w = (struct element_t **)
1442                       obstack_alloc (&collate->mempool,
1443                                      sizeof (struct element_t *));
1444                     elem->weights[cnt].w[0] = elem;
1445                     elem->weights[cnt].cnt = 1;
1446                   }
1447                 else
1448                   {
1449                     /* Simly use the weight from `ellipsis_weight'.  */
1450                     elem->weights[cnt].w =
1451                       collate->ellipsis_weight.weights[cnt].w;
1452                     elem->weights[cnt].cnt =
1453                       collate->ellipsis_weight.weights[cnt].cnt;
1454                   }
1455             }
1456         }
1457     }
1458 }
1459
1460
1461 static void
1462 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1463                  struct localedef_t *copy_locale, int ignore_content)
1464 {
1465   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1466     {
1467       struct locale_collate_t *collate;
1468
1469       if (copy_locale == NULL)
1470         {
1471           collate = locale->categories[LC_COLLATE].collate =
1472             (struct locale_collate_t *)
1473             xcalloc (1, sizeof (struct locale_collate_t));
1474
1475           /* Init the various data structures.  */
1476           init_hash (&collate->elem_table, 100);
1477           init_hash (&collate->sym_table, 100);
1478           init_hash (&collate->seq_table, 500);
1479           obstack_init (&collate->mempool);
1480
1481           collate->col_weight_max = -1;
1482         }
1483       else
1484         /* Reuse the copy_locale's data structures.  */
1485         collate = locale->categories[LC_COLLATE].collate =
1486           copy_locale->categories[LC_COLLATE].collate;
1487     }
1488
1489   ldfile->translate_strings = 0;
1490   ldfile->return_widestr = 0;
1491 }
1492
1493
1494 void
1495 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1496 {
1497   /* Now is the time when we can assign the individual collation
1498      values for all the symbols.  We have possibly different values
1499      for the wide- and the multibyte-character symbols.  This is done
1500      since it might make a difference in the encoding if there is in
1501      some cases no multibyte-character but there are wide-characters.
1502      (The other way around it is not important since theencoded
1503      collation value in the wide-character case is 32 bits wide and
1504      therefore requires no encoding).
1505
1506      The lowest collation value assigned is 2.  Zero is reserved for
1507      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1508      functions and 1 is used to separate the individual passes for the
1509      different rules.
1510
1511      We also have to construct is list with all the bytes/words which
1512      can come first in a sequence, followed by all the elements which
1513      also start with this byte/word.  The order is reverse which has
1514      among others the important effect that longer strings are located
1515      first in the list.  This is required for the output data since
1516      the algorithm used in `strcoll' etc depends on this.
1517
1518      The multibyte case is easy.  We simply sort into an array with
1519      256 elements.  */
1520   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1521   int mbact[nrules];
1522   int wcact;
1523   int mbseqact;
1524   int wcseqact;
1525   struct element_t *runp;
1526   int i;
1527   int need_undefined = 0;
1528   struct section_list *sect;
1529   int ruleidx;
1530   int nr_wide_elems = 0;
1531
1532   if (collate == NULL)
1533     {
1534       /* No data, no check.  */
1535       if (! be_quiet)
1536         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1537                                 "LC_COLLATE"));
1538       return;
1539     }
1540
1541   /* If this assertion is hit change the type in `element_t'.  */
1542   assert (nrules <= sizeof (runp->used_in_level) * 8);
1543
1544   /* Make sure that the `position' rule is used either in all sections
1545      or in none.  */
1546   for (i = 0; i < nrules; ++i)
1547     for (sect = collate->sections; sect != NULL; sect = sect->next)
1548       if (sect->rules != NULL
1549           && ((sect->rules[i] & sort_position)
1550               != (collate->sections->rules[i] & sort_position)))
1551         {
1552           WITH_CUR_LOCALE (error (0, 0, _("\
1553 %s: `position' must be used for a specific level in all sections or none"),
1554                                   "LC_COLLATE"));
1555           break;
1556         }
1557
1558   /* Find out which elements are used at which level.  At the same
1559      time we find out whether we have any undefined symbols.  */
1560   runp = collate->start;
1561   while (runp != NULL)
1562     {
1563       if (runp->mbs != NULL)
1564         {
1565           for (i = 0; i < nrules; ++i)
1566             {
1567               int j;
1568
1569               for (j = 0; j < runp->weights[i].cnt; ++j)
1570                 /* A NULL pointer as the weight means IGNORE.  */
1571                 if (runp->weights[i].w[j] != NULL)
1572                   {
1573                     if (runp->weights[i].w[j]->weights == NULL)
1574                       {
1575                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1576                                                         runp->line,
1577                                                         _("symbol `%s' not defined"),
1578                                                         runp->weights[i].w[j]->name));
1579
1580                         need_undefined = 1;
1581                         runp->weights[i].w[j] = &collate->undefined;
1582                       }
1583                     else
1584                       /* Set the bit for the level.  */
1585                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1586                   }
1587             }
1588         }
1589
1590       /* Up to the next entry.  */
1591       runp = runp->next;
1592     }
1593
1594   /* Walk through the list of defined sequences and assign weights.  Also
1595      create the data structure which will allow generating the single byte
1596      character based tables.
1597
1598      Since at each time only the weights for each of the rules are
1599      only compared to other weights for this rule it is possible to
1600      assign more compact weight values than simply counting all
1601      weights in sequence.  We can assign weights from 3, one for each
1602      rule individually and only for those elements, which are actually
1603      used for this rule.
1604
1605      Why is this important?  It is not for the wide char table.  But
1606      it is for the singlebyte output since here larger numbers have to
1607      be encoded to make it possible to emit the value as a byte
1608      string.  */
1609   for (i = 0; i < nrules; ++i)
1610     mbact[i] = 2;
1611   wcact = 2;
1612   mbseqact = 0;
1613   wcseqact = 0;
1614   runp = collate->start;
1615   while (runp != NULL)
1616     {
1617       /* Determine the order.  */
1618       if (runp->used_in_level != 0)
1619         {
1620           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1621                                                  nrules * sizeof (int));
1622
1623           for (i = 0; i < nrules; ++i)
1624             if ((runp->used_in_level & (1 << i)) != 0)
1625               runp->mborder[i] = mbact[i]++;
1626             else
1627               runp->mborder[i] = 0;
1628         }
1629
1630       if (runp->mbs != NULL)
1631         {
1632           struct element_t **eptr;
1633           struct element_t *lastp = NULL;
1634
1635           /* Find the point where to insert in the list.  */
1636           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1637           while (*eptr != NULL)
1638             {
1639               if ((*eptr)->nmbs < runp->nmbs)
1640                 break;
1641
1642               if ((*eptr)->nmbs == runp->nmbs)
1643                 {
1644                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1645
1646                   if (c == 0)
1647                     {
1648                       /* This should not happen.  It means that we have
1649                          to symbols with the same byte sequence.  It is
1650                          of course an error.  */
1651                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1652                                                       (*eptr)->line,
1653                                                       _("\
1654 symbol `%s' has the same encoding as"), (*eptr)->name);
1655                                        error_at_line (0, 0, runp->file,
1656                                                       runp->line,
1657                                                       _("symbol `%s'"),
1658                                                       runp->name));
1659                       goto dont_insert;
1660                     }
1661                   else if (c < 0)
1662                     /* Insert it here.  */
1663                     break;
1664                 }
1665
1666               /* To the next entry.  */
1667               lastp = *eptr;
1668               eptr = &(*eptr)->mbnext;
1669             }
1670
1671           /* Set the pointers.  */
1672           runp->mbnext = *eptr;
1673           runp->mblast = lastp;
1674           if (*eptr != NULL)
1675             (*eptr)->mblast = runp;
1676           *eptr = runp;
1677         dont_insert:
1678           ;
1679         }
1680
1681       if (runp->used_in_level)
1682         {
1683           runp->wcorder = wcact++;
1684
1685           /* We take the opportunity to count the elements which have
1686              wide characters.  */
1687           ++nr_wide_elems;
1688         }
1689
1690       if (runp->is_character)
1691         {
1692           if (runp->nmbs == 1)
1693             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1694
1695           runp->wcseqorder = wcseqact++;
1696         }
1697       else if (runp->mbs != NULL && runp->weights != NULL)
1698         /* This is for collation elements.  */
1699         runp->wcseqorder = wcseqact++;
1700
1701       /* Up to the next entry.  */
1702       runp = runp->next;
1703     }
1704
1705   /* Find out whether any of the `mbheads' entries is unset.  In this
1706      case we use the UNDEFINED entry.  */
1707   for (i = 1; i < 256; ++i)
1708     if (collate->mbheads[i] == NULL)
1709       {
1710         need_undefined = 1;
1711         collate->mbheads[i] = &collate->undefined;
1712       }
1713
1714   /* Now to the wide character case.  */
1715   collate->wcheads.p = 6;
1716   collate->wcheads.q = 10;
1717   wchead_table_init (&collate->wcheads);
1718
1719   collate->wcseqorder.p = 6;
1720   collate->wcseqorder.q = 10;
1721   collseq_table_init (&collate->wcseqorder);
1722
1723   /* Start adding.  */
1724   runp = collate->start;
1725   while (runp != NULL)
1726     {
1727       if (runp->wcs != NULL)
1728         {
1729           struct element_t *e;
1730           struct element_t **eptr;
1731           struct element_t *lastp;
1732
1733           /* Insert the collation sequence value.  */
1734           if (runp->is_character)
1735             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1736                                runp->wcseqorder);
1737
1738           /* Find the point where to insert in the list.  */
1739           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1740           eptr = &e;
1741           lastp = NULL;
1742           while (*eptr != NULL)
1743             {
1744               if ((*eptr)->nwcs < runp->nwcs)
1745                 break;
1746
1747               if ((*eptr)->nwcs == runp->nwcs)
1748                 {
1749                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1750                                    (wchar_t *) runp->wcs, runp->nwcs);
1751
1752                   if (c == 0)
1753                     {
1754                       /* This should not happen.  It means that we have
1755                          two symbols with the same byte sequence.  It is
1756                          of course an error.  */
1757                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1758                                                       (*eptr)->line,
1759                                                       _("\
1760 symbol `%s' has the same encoding as"), (*eptr)->name);
1761                                        error_at_line (0, 0, runp->file,
1762                                                       runp->line,
1763                                                       _("symbol `%s'"),
1764                                                       runp->name));
1765                       goto dont_insertwc;
1766                     }
1767                   else if (c < 0)
1768                     /* Insert it here.  */
1769                     break;
1770                 }
1771
1772               /* To the next entry.  */
1773               lastp = *eptr;
1774               eptr = &(*eptr)->wcnext;
1775             }
1776
1777           /* Set the pointers.  */
1778           runp->wcnext = *eptr;
1779           runp->wclast = lastp;
1780           if (*eptr != NULL)
1781             (*eptr)->wclast = runp;
1782           *eptr = runp;
1783           if (eptr == &e)
1784             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1785         dont_insertwc:
1786           ;
1787         }
1788
1789       /* Up to the next entry.  */
1790       runp = runp->next;
1791     }
1792
1793   collseq_table_finalize (&collate->wcseqorder);
1794
1795   /* Now determine whether the UNDEFINED entry is needed and if yes,
1796      whether it was defined.  */
1797   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1798   if (collate->undefined.file == NULL)
1799     {
1800       if (need_undefined)
1801         {
1802           /* This seems not to be enforced by recent standards.  Don't
1803              emit an error, simply append UNDEFINED at the end.  */
1804           if (0)
1805             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1806
1807           /* Add UNDEFINED at the end.  */
1808           collate->undefined.mborder =
1809             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1810
1811           for (i = 0; i < nrules; ++i)
1812             collate->undefined.mborder[i] = mbact[i]++;
1813         }
1814
1815       /* In any case we will need the definition for the wide character
1816          case.  But we will not complain that it is missing since the
1817          specification strangely enough does not seem to account for
1818          this.  */
1819       collate->undefined.wcorder = wcact++;
1820     }
1821
1822   /* Finally, try to unify the rules for the sections.  Whenever the rules
1823      for a section are the same as those for another section give the
1824      ruleset the same index.  Since there are never many section we can
1825      use an O(n^2) algorithm here.  */
1826   sect = collate->sections;
1827   while (sect != NULL && sect->rules == NULL)
1828     sect = sect->next;
1829
1830   /* Bail out if we have no sections because of earlier errors.  */
1831   if (sect == NULL)
1832     {
1833       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1834                               _("too many errors; giving up")));
1835       return;
1836     }
1837
1838   ruleidx = 0;
1839   do
1840     {
1841       struct section_list *osect = collate->sections;
1842
1843       while (osect != sect)
1844         if (osect->rules != NULL
1845             && memcmp (osect->rules, sect->rules,
1846                        nrules * sizeof (osect->rules[0])) == 0)
1847           break;
1848         else
1849           osect = osect->next;
1850
1851       if (osect == sect)
1852         sect->ruleidx = ruleidx++;
1853       else
1854         sect->ruleidx = osect->ruleidx;
1855
1856       /* Next section.  */
1857       do
1858         sect = sect->next;
1859       while (sect != NULL && sect->rules == NULL);
1860     }
1861   while (sect != NULL);
1862   /* We are currently not prepared for more than 128 rulesets.  But this
1863      should never really be a problem.  */
1864   assert (ruleidx <= 128);
1865 }
1866
1867
1868 static int32_t
1869 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1870                struct element_t *elem)
1871 {
1872   size_t cnt;
1873   int32_t retval;
1874
1875   /* Optimize the use of UNDEFINED.  */
1876   if (elem == &collate->undefined)
1877     /* The weights are already inserted.  */
1878     return 0;
1879
1880   /* This byte can start exactly one collation element and this is
1881      a single byte.  We can directly give the index to the weights.  */
1882   retval = obstack_object_size (pool);
1883
1884   /* Construct the weight.  */
1885   for (cnt = 0; cnt < nrules; ++cnt)
1886     {
1887       char buf[elem->weights[cnt].cnt * 7];
1888       int len = 0;
1889       int i;
1890
1891       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1892         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1893         if (elem->weights[cnt].w[i] != NULL)
1894           len += utf8_encode (&buf[len],
1895                               elem->weights[cnt].w[i]->mborder[cnt]);
1896
1897       /* And add the buffer content.  */
1898       obstack_1grow (pool, len);
1899       obstack_grow (pool, buf, len);
1900     }
1901
1902   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1903 }
1904
1905
1906 static int32_t
1907 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1908                  struct element_t *elem)
1909 {
1910   size_t cnt;
1911   int32_t retval;
1912
1913   /* Optimize the use of UNDEFINED.  */
1914   if (elem == &collate->undefined)
1915     /* The weights are already inserted.  */
1916     return 0;
1917
1918   /* This byte can start exactly one collation element and this is
1919      a single byte.  We can directly give the index to the weights.  */
1920   retval = obstack_object_size (pool) / sizeof (int32_t);
1921
1922   /* Construct the weight.  */
1923   for (cnt = 0; cnt < nrules; ++cnt)
1924     {
1925       int32_t buf[elem->weights[cnt].cnt];
1926       int i;
1927       int32_t j;
1928
1929       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1930         if (elem->weights[cnt].w[i] != NULL)
1931           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1932
1933       /* And add the buffer content.  */
1934       obstack_int32_grow (pool, j);
1935
1936       obstack_grow (pool, buf, j * sizeof (int32_t));
1937     }
1938
1939   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1940 }
1941
1942 /* If localedef is every threaded, this would need to be __thread var.  */
1943 static struct
1944 {
1945   struct obstack *weightpool;
1946   struct obstack *extrapool;
1947   struct obstack *indpool;
1948   struct locale_collate_t *collate;
1949   struct collidx_table *tablewc;
1950 } atwc;
1951
1952 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1953
1954 static void
1955 add_to_tablewc (uint32_t ch, struct element_t *runp)
1956 {
1957   if (runp->wcnext == NULL && runp->nwcs == 1)
1958     {
1959       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1960                                            runp);
1961       collidx_table_add (atwc.tablewc, ch, weigthidx);
1962     }
1963   else
1964     {
1965       /* As for the singlebyte table, we recognize sequences and
1966          compress them.  */
1967       struct element_t *lastp;
1968
1969       collidx_table_add (atwc.tablewc, ch,
1970                          -(obstack_object_size (atwc.extrapool)
1971                          / sizeof (uint32_t)));
1972
1973       do
1974         {
1975           /* Store the current index in the weight table.  We know that
1976              the current position in the `extrapool' is aligned on a
1977              32-bit address.  */
1978           int32_t weightidx;
1979           int added;
1980
1981           /* Find out wether this is a single entry or we have more than
1982              one consecutive entry.  */
1983           if (runp->wcnext != NULL
1984               && runp->nwcs == runp->wcnext->nwcs
1985               && wmemcmp ((wchar_t *) runp->wcs,
1986                           (wchar_t *)runp->wcnext->wcs,
1987                           runp->nwcs - 1) == 0
1988               && (runp->wcs[runp->nwcs - 1]
1989                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
1990             {
1991               int i;
1992               struct element_t *series_startp = runp;
1993               struct element_t *curp;
1994
1995               /* Now add first the initial byte sequence.  */
1996               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
1997               if (sizeof (int32_t) == sizeof (int))
1998                 obstack_make_room (atwc.extrapool, added);
1999
2000               /* More than one consecutive entry.  We mark this by having
2001                  a negative index into the indirect table.  */
2002               obstack_int32_grow_fast (atwc.extrapool,
2003                                        -(obstack_object_size (atwc.indpool)
2004                                          / sizeof (int32_t)));
2005               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2006
2007               do
2008                 runp = runp->wcnext;
2009               while (runp->wcnext != NULL
2010                      && runp->nwcs == runp->wcnext->nwcs
2011                      && wmemcmp ((wchar_t *) runp->wcs,
2012                                  (wchar_t *)runp->wcnext->wcs,
2013                                  runp->nwcs - 1) == 0
2014                      && (runp->wcs[runp->nwcs - 1]
2015                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2016
2017               /* Now walk backward from here to the beginning.  */
2018               curp = runp;
2019
2020               for (i = 1; i < runp->nwcs; ++i)
2021                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2022
2023               /* Now find the end of the consecutive sequence and
2024                  add all the indeces in the indirect pool.  */
2025               do
2026                 {
2027                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2028                                                curp);
2029                   obstack_int32_grow (atwc.indpool, weightidx);
2030
2031                   curp = curp->wclast;
2032                 }
2033               while (curp != series_startp);
2034
2035               /* Add the final weight.  */
2036               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2037                                            curp);
2038               obstack_int32_grow (atwc.indpool, weightidx);
2039
2040               /* And add the end byte sequence.  Without length this
2041                  time.  */
2042               for (i = 1; i < curp->nwcs; ++i)
2043                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2044             }
2045           else
2046             {
2047               /* A single entry.  Simply add the index and the length and
2048                  string (except for the first character which is already
2049                  tested for).  */
2050               int i;
2051
2052               /* Output the weight info.  */
2053               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2054                                            runp);
2055
2056               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2057               if (sizeof (int) == sizeof (int32_t))
2058                 obstack_make_room (atwc.extrapool, added);
2059
2060               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2061               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2062               for (i = 1; i < runp->nwcs; ++i)
2063                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2064             }
2065
2066           /* Next entry.  */
2067           lastp = runp;
2068           runp = runp->wcnext;
2069         }
2070       while (runp != NULL);
2071     }
2072 }
2073
2074 void
2075 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2076                 const char *output_path)
2077 {
2078   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2079   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2080   struct iovec iov[2 + nelems];
2081   struct locale_file data;
2082   uint32_t idx[nelems];
2083   size_t cnt;
2084   size_t ch;
2085   int32_t tablemb[256];
2086   struct obstack weightpool;
2087   struct obstack extrapool;
2088   struct obstack indirectpool;
2089   struct section_list *sect;
2090   struct collidx_table tablewc;
2091   uint32_t elem_size;
2092   uint32_t *elem_table;
2093   int i;
2094   struct element_t *runp;
2095
2096   data.magic = LIMAGIC (LC_COLLATE);
2097   data.n = nelems;
2098   iov[0].iov_base = (void *) &data;
2099   iov[0].iov_len = sizeof (data);
2100
2101   iov[1].iov_base = (void *) idx;
2102   iov[1].iov_len = sizeof (idx);
2103
2104   idx[0] = iov[0].iov_len + iov[1].iov_len;
2105   cnt = 0;
2106
2107   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2108   iov[2 + cnt].iov_base = &nrules;
2109   iov[2 + cnt].iov_len = sizeof (uint32_t);
2110   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2111   ++cnt;
2112
2113   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2114   if (collate == NULL)
2115     {
2116       int32_t dummy = 0;
2117
2118       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2119         {
2120           /* The words have to be handled specially.  */
2121           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2122             {
2123               iov[2 + cnt].iov_base = &dummy;
2124               iov[2 + cnt].iov_len = sizeof (int32_t);
2125             }
2126           else
2127             {
2128               iov[2 + cnt].iov_base = NULL;
2129               iov[2 + cnt].iov_len = 0;
2130             }
2131
2132           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2133             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2134           ++cnt;
2135         }
2136
2137       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2138
2139       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2140
2141       return;
2142     }
2143
2144   obstack_init (&weightpool);
2145   obstack_init (&extrapool);
2146   obstack_init (&indirectpool);
2147
2148   /* Since we are using the sign of an integer to mark indirection the
2149      offsets in the arrays we are indirectly referring to must not be
2150      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2151   obstack_int32_grow (&extrapool, 0);
2152   obstack_int32_grow (&indirectpool, 0);
2153
2154   /* Prepare the ruleset table.  */
2155   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2156     if (sect->rules != NULL && sect->ruleidx == i)
2157       {
2158         int j;
2159
2160         obstack_make_room (&weightpool, nrules);
2161
2162         for (j = 0; j < nrules; ++j)
2163           obstack_1grow_fast (&weightpool, sect->rules[j]);
2164         ++i;
2165       }
2166   /* And align the output.  */
2167   i = (nrules * i) % __alignof__ (int32_t);
2168   if (i > 0)
2169     do
2170       obstack_1grow (&weightpool, '\0');
2171     while (++i < __alignof__ (int32_t));
2172
2173   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2174   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2175   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2176   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2177   ++cnt;
2178
2179   /* Generate the 8-bit table.  Walk through the lists of sequences
2180      starting with the same byte and add them one after the other to
2181      the table.  In case we have more than one sequence starting with
2182      the same byte we have to use extra indirection.
2183
2184      First add a record for the NUL byte.  This entry will never be used
2185      so it does not matter.  */
2186   tablemb[0] = 0;
2187
2188   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2189      will probably be used more than once it is good to store the
2190      weights only once.  */
2191   if (collate->undefined.used_in_level != 0)
2192     output_weight (&weightpool, collate, &collate->undefined);
2193
2194   for (ch = 1; ch < 256; ++ch)
2195     if (collate->mbheads[ch]->mbnext == NULL
2196         && collate->mbheads[ch]->nmbs <= 1)
2197       {
2198         tablemb[ch] = output_weight (&weightpool, collate,
2199                                      collate->mbheads[ch]);
2200       }
2201     else
2202       {
2203         /* The entries in the list are sorted by length and then
2204            alphabetically.  This is the order in which we will add the
2205            elements to the collation table.  This allows simply walking
2206            the table in sequence and stopping at the first matching
2207            entry.  Since the longer sequences are coming first in the
2208            list they have the possibility to match first, just as it
2209            has to be.  In the worst case we are walking to the end of
2210            the list where we put, if no singlebyte sequence is defined
2211            in the locale definition, the weights for UNDEFINED.
2212
2213            To reduce the length of the search list we compress them a bit.
2214            This happens by collecting sequences of consecutive byte
2215            sequences in one entry (having and begin and end byte sequence)
2216            and add only one index into the weight table.  We can find the
2217            consecutive entries since they are also consecutive in the list.  */
2218         struct element_t *runp = collate->mbheads[ch];
2219         struct element_t *lastp;
2220
2221         assert ((obstack_object_size (&extrapool)
2222                  & (__alignof__ (int32_t) - 1)) == 0);
2223
2224         tablemb[ch] = -obstack_object_size (&extrapool);
2225
2226         do
2227           {
2228             /* Store the current index in the weight table.  We know that
2229                the current position in the `extrapool' is aligned on a
2230                32-bit address.  */
2231             int32_t weightidx;
2232             int added;
2233
2234             /* Find out wether this is a single entry or we have more than
2235                one consecutive entry.  */
2236             if (runp->mbnext != NULL
2237                 && runp->nmbs == runp->mbnext->nmbs
2238                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2239                 && (runp->mbs[runp->nmbs - 1]
2240                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2241               {
2242                 int i;
2243                 struct element_t *series_startp = runp;
2244                 struct element_t *curp;
2245
2246                 /* Compute how much space we will need.  */
2247                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2248                           + __alignof__ (int32_t) - 1)
2249                          & ~(__alignof__ (int32_t) - 1));
2250                 assert ((obstack_object_size (&extrapool)
2251                          & (__alignof__ (int32_t) - 1)) == 0);
2252                 obstack_make_room (&extrapool, added);
2253
2254                 /* More than one consecutive entry.  We mark this by having
2255                    a negative index into the indirect table.  */
2256                 obstack_int32_grow_fast (&extrapool,
2257                                          -(obstack_object_size (&indirectpool)
2258                                            / sizeof (int32_t)));
2259
2260                 /* Now search first the end of the series.  */
2261                 do
2262                   runp = runp->mbnext;
2263                 while (runp->mbnext != NULL
2264                        && runp->nmbs == runp->mbnext->nmbs
2265                        && memcmp (runp->mbs, runp->mbnext->mbs,
2266                                   runp->nmbs - 1) == 0
2267                        && (runp->mbs[runp->nmbs - 1]
2268                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2269
2270                 /* Now walk backward from here to the beginning.  */
2271                 curp = runp;
2272
2273                 assert (runp->nmbs <= 256);
2274                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2275                 for (i = 1; i < curp->nmbs; ++i)
2276                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2277
2278                 /* Now find the end of the consecutive sequence and
2279                    add all the indeces in the indirect pool.  */
2280                 do
2281                   {
2282                     weightidx = output_weight (&weightpool, collate, curp);
2283                     obstack_int32_grow (&indirectpool, weightidx);
2284
2285                     curp = curp->mblast;
2286                   }
2287                 while (curp != series_startp);
2288
2289                 /* Add the final weight.  */
2290                 weightidx = output_weight (&weightpool, collate, curp);
2291                 obstack_int32_grow (&indirectpool, weightidx);
2292
2293                 /* And add the end byte sequence.  Without length this
2294                    time.  */
2295                 for (i = 1; i < curp->nmbs; ++i)
2296                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2297               }
2298             else
2299               {
2300                 /* A single entry.  Simply add the index and the length and
2301                    string (except for the first character which is already
2302                    tested for).  */
2303                 int i;
2304
2305                 /* Output the weight info.  */
2306                 weightidx = output_weight (&weightpool, collate, runp);
2307
2308                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2309                           + __alignof__ (int32_t) - 1)
2310                          & ~(__alignof__ (int32_t) - 1));
2311                 assert ((obstack_object_size (&extrapool)
2312                          & (__alignof__ (int32_t) - 1)) == 0);
2313                 obstack_make_room (&extrapool, added);
2314
2315                 obstack_int32_grow_fast (&extrapool, weightidx);
2316                 assert (runp->nmbs <= 256);
2317                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2318
2319                 for (i = 1; i < runp->nmbs; ++i)
2320                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2321               }
2322
2323             /* Add alignment bytes if necessary.  */
2324             while ((obstack_object_size (&extrapool)
2325                     & (__alignof__ (int32_t) - 1)) != 0)
2326               obstack_1grow_fast (&extrapool, '\0');
2327
2328             /* Next entry.  */
2329             lastp = runp;
2330             runp = runp->mbnext;
2331           }
2332         while (runp != NULL);
2333
2334         assert ((obstack_object_size (&extrapool)
2335                  & (__alignof__ (int32_t) - 1)) == 0);
2336
2337         /* If the final entry in the list is not a single character we
2338            add an UNDEFINED entry here.  */
2339         if (lastp->nmbs != 1)
2340           {
2341             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2342                          & ~(__alignof__ (int32_t) - 1));
2343             obstack_make_room (&extrapool, added);
2344
2345             obstack_int32_grow_fast (&extrapool, 0);
2346             /* XXX What rule? We just pick the first.  */
2347             obstack_1grow_fast (&extrapool, 0);
2348             /* Length is zero.  */
2349             obstack_1grow_fast (&extrapool, 0);
2350
2351             /* Add alignment bytes if necessary.  */
2352             while ((obstack_object_size (&extrapool)
2353                     & (__alignof__ (int32_t) - 1)) != 0)
2354               obstack_1grow_fast (&extrapool, '\0');
2355           }
2356       }
2357
2358   /* Add padding to the tables if necessary.  */
2359   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2360          != 0)
2361     obstack_1grow (&weightpool, 0);
2362
2363   /* Now add the four tables.  */
2364   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2365   iov[2 + cnt].iov_base = tablemb;
2366   iov[2 + cnt].iov_len = sizeof (tablemb);
2367   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2368   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2369   ++cnt;
2370
2371   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2372   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2373   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2374   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2375   ++cnt;
2376
2377   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2378   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2379   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2380   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2381   ++cnt;
2382
2383   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2384   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2385   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2386   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2387   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2388   ++cnt;
2389
2390
2391   /* Now the same for the wide character table.  We need to store some
2392      more information here.  */
2393   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2394   iov[2 + cnt].iov_base = NULL;
2395   iov[2 + cnt].iov_len = 0;
2396   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2397   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2398   ++cnt;
2399
2400   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2401   iov[2 + cnt].iov_base = NULL;
2402   iov[2 + cnt].iov_len = 0;
2403   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2404   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2405   ++cnt;
2406
2407   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2408   iov[2 + cnt].iov_base = NULL;
2409   iov[2 + cnt].iov_len = 0;
2410   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2411   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2412   ++cnt;
2413
2414   /* Since we are using the sign of an integer to mark indirection the
2415      offsets in the arrays we are indirectly referring to must not be
2416      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2417   obstack_int32_grow (&extrapool, 0);
2418   obstack_int32_grow (&indirectpool, 0);
2419
2420   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2421      will probably be used more than once it is good to store the
2422      weights only once.  */
2423   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2424     abort ();
2425
2426   /* Generate the table.  Walk through the lists of sequences starting
2427      with the same wide character and add them one after the other to
2428      the table.  In case we have more than one sequence starting with
2429      the same byte we have to use extra indirection.  */
2430   tablewc.p = 6;
2431   tablewc.q = 10;
2432   collidx_table_init (&tablewc);
2433
2434   atwc.weightpool = &weightpool;
2435   atwc.extrapool = &extrapool;
2436   atwc.indpool = &indirectpool;
2437   atwc.collate = collate;
2438   atwc.tablewc = &tablewc;
2439
2440   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2441
2442   memset (&atwc, 0, sizeof (atwc));
2443
2444   collidx_table_finalize (&tablewc);
2445
2446   /* Now add the four tables.  */
2447   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2448   iov[2 + cnt].iov_base = tablewc.result;
2449   iov[2 + cnt].iov_len = tablewc.result_size;
2450   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2451   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2452   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2453   ++cnt;
2454
2455   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2456   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2457   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2458   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2459   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2460   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2461   ++cnt;
2462
2463   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2464   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2465   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2466   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2467   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2468   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2469   ++cnt;
2470
2471   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2472   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2473   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2474   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2475   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2476   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2477   ++cnt;
2478
2479
2480   /* Finally write the table with collation element names out.  It is
2481      a hash table with a simple function which gets the name of the
2482      character as the input.  One character might have many names.  The
2483      value associated with the name is an index into the weight table
2484      where we are then interested in the first-level weight value.
2485
2486      To determine how large the table should be we are counting the
2487      elements have to put in.  Since we are using internal chaining
2488      using a secondary hash function we have to make the table a bit
2489      larger to avoid extremely long search times.  We can achieve
2490      good results with a 40% larger table than there are entries.  */
2491   elem_size = 0;
2492   runp = collate->start;
2493   while (runp != NULL)
2494     {
2495       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2496         /* Yep, the element really counts.  */
2497         ++elem_size;
2498
2499       runp = runp->next;
2500     }
2501   /* Add 40% and find the next prime number.  */
2502   elem_size = next_prime (elem_size * 1.4);
2503
2504   /* Allocate the table.  Each entry consists of two words: the hash
2505      value and an index in a secondary table which provides the index
2506      into the weight table and the string itself (so that a match can
2507      be determined).  */
2508   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2509                                            elem_size * 2 * sizeof (uint32_t));
2510   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2511
2512   /* Now add the elements.  */
2513   runp = collate->start;
2514   while (runp != NULL)
2515     {
2516       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2517         {
2518           /* Compute the hash value of the name.  */
2519           uint32_t namelen = strlen (runp->name);
2520           uint32_t hash = elem_hash (runp->name, namelen);
2521           size_t idx = hash % elem_size;
2522 #ifndef NDEBUG
2523           size_t start_idx = idx;
2524 #endif
2525
2526           if (elem_table[idx * 2] != 0)
2527             {
2528               /* The spot is already taken.  Try iterating using the value
2529                  from the secondary hashing function.  */
2530               size_t iter = hash % (elem_size - 2) + 1;
2531
2532               do
2533                 {
2534                   idx += iter;
2535                   if (idx >= elem_size)
2536                     idx -= elem_size;
2537                   assert (idx != start_idx);
2538                 }
2539               while (elem_table[idx * 2] != 0);
2540             }
2541           /* This is the spot where we will insert the value.  */
2542           elem_table[idx * 2] = hash;
2543           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2544
2545           /* The the string itself including length.  */
2546           obstack_1grow (&extrapool, namelen);
2547           obstack_grow (&extrapool, runp->name, namelen);
2548
2549           /* And the multibyte representation.  */
2550           obstack_1grow (&extrapool, runp->nmbs);
2551           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2552
2553           /* And align again to 32 bits.  */
2554           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2555             obstack_grow (&extrapool, "\0\0",
2556                           (sizeof (int32_t)
2557                            - ((1 + namelen + 1 + runp->nmbs)
2558                               % sizeof (int32_t))));
2559
2560           /* Now some 32-bit values: multibyte collation sequence,
2561              wide char string (including length), and wide char
2562              collation sequence.  */
2563           obstack_int32_grow (&extrapool, runp->mbseqorder);
2564
2565           obstack_int32_grow (&extrapool, runp->nwcs);
2566           obstack_grow (&extrapool, runp->wcs,
2567                         runp->nwcs * sizeof (uint32_t));
2568
2569           obstack_int32_grow (&extrapool, runp->wcseqorder);
2570         }
2571
2572       runp = runp->next;
2573     }
2574
2575   /* Prepare to write out this data.  */
2576   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2577   iov[2 + cnt].iov_base = &elem_size;
2578   iov[2 + cnt].iov_len = sizeof (int32_t);
2579   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2580   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2581   ++cnt;
2582
2583   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2584   iov[2 + cnt].iov_base = elem_table;
2585   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2586   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2587   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2588   ++cnt;
2589
2590   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2591   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2592   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2593   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2594   ++cnt;
2595
2596   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2597   iov[2 + cnt].iov_base = collate->mbseqorder;
2598   iov[2 + cnt].iov_len = 256;
2599   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2600   ++cnt;
2601
2602   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2603   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2604   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2605   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2606   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2607   ++cnt;
2608
2609   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2610   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2611   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2612   ++cnt;
2613
2614   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2615
2616   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2617
2618   obstack_free (&weightpool, NULL);
2619   obstack_free (&extrapool, NULL);
2620   obstack_free (&indirectpool, NULL);
2621 }
2622
2623
2624 void
2625 collate_read (struct linereader *ldfile, struct localedef_t *result,
2626               const struct charmap_t *charmap, const char *repertoire_name,
2627               int ignore_content)
2628 {
2629   struct repertoire_t *repertoire = NULL;
2630   struct locale_collate_t *collate;
2631   struct token *now;
2632   struct token *arg = NULL;
2633   enum token_t nowtok;
2634   enum token_t was_ellipsis = tok_none;
2635   struct localedef_t *copy_locale = NULL;
2636   /* Parsing state:
2637      0 - start
2638      1 - between `order-start' and `order-end'
2639      2 - after `order-end'
2640      3 - after `reorder-after', waiting for `reorder-end'
2641      4 - after `reorder-end'
2642      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2643      6 - after `reorder-sections-end'
2644   */
2645   int state = 0;
2646
2647   /* Get the repertoire we have to use.  */
2648   if (repertoire_name != NULL)
2649     repertoire = repertoire_read (repertoire_name);
2650
2651   /* The rest of the line containing `LC_COLLATE' must be free.  */
2652   lr_ignore_rest (ldfile, 1);
2653
2654   do
2655     {
2656       now = lr_token (ldfile, charmap, result, NULL, verbose);
2657       nowtok = now->tok;
2658     }
2659   while (nowtok == tok_eol);
2660
2661   if (nowtok == tok_copy)
2662     {
2663       state = 2;
2664       now = lr_token (ldfile, charmap, result, NULL, verbose);
2665       if (now->tok != tok_string)
2666         {
2667           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2668
2669         skip_category:
2670           do
2671             now = lr_token (ldfile, charmap, result, NULL, verbose);
2672           while (now->tok != tok_eof && now->tok != tok_end);
2673
2674           if (now->tok != tok_eof
2675               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2676                   now->tok == tok_eof))
2677             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2678           else if (now->tok != tok_lc_collate)
2679             {
2680               lr_error (ldfile, _("\
2681 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2682               lr_ignore_rest (ldfile, 0);
2683             }
2684           else
2685             lr_ignore_rest (ldfile, 1);
2686
2687           return;
2688         }
2689
2690       if (! ignore_content)
2691         {
2692           /* Get the locale definition.  */
2693           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2694                                      repertoire_name, charmap, NULL);
2695           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2696             {
2697               /* Not yet loaded.  So do it now.  */
2698               if (locfile_read (copy_locale, charmap) != 0)
2699                 goto skip_category;
2700             }
2701
2702           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2703             return;
2704         }
2705
2706       lr_ignore_rest (ldfile, 1);
2707
2708       now = lr_token (ldfile, charmap, result, NULL, verbose);
2709       nowtok = now->tok;
2710     }
2711
2712   /* Prepare the data structures.  */
2713   collate_startup (ldfile, result, copy_locale, ignore_content);
2714   collate = result->categories[LC_COLLATE].collate;
2715
2716   while (1)
2717     {
2718       char ucs4buf[10];
2719       char *symstr;
2720       size_t symlen;
2721
2722       /* Of course we don't proceed beyond the end of file.  */
2723       if (nowtok == tok_eof)
2724         break;
2725
2726       /* Ingore empty lines.  */
2727       if (nowtok == tok_eol)
2728         {
2729           now = lr_token (ldfile, charmap, result, NULL, verbose);
2730           nowtok = now->tok;
2731           continue;
2732         }
2733
2734       switch (nowtok)
2735         {
2736         case tok_copy:
2737           /* Allow copying other locales.  */
2738           now = lr_token (ldfile, charmap, result, NULL, verbose);
2739           if (now->tok != tok_string)
2740             goto err_label;
2741
2742           if (! ignore_content)
2743             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2744                          charmap, result);
2745
2746           lr_ignore_rest (ldfile, 1);
2747           break;
2748
2749         case tok_coll_weight_max:
2750           /* Ignore the rest of the line if we don't need the input of
2751              this line.  */
2752           if (ignore_content)
2753             {
2754               lr_ignore_rest (ldfile, 0);
2755               break;
2756             }
2757
2758           if (state != 0)
2759             goto err_label;
2760
2761           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2762           if (arg->tok != tok_number)
2763             goto err_label;
2764           if (collate->col_weight_max != -1)
2765             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2766                       "LC_COLLATE", "col_weight_max");
2767           else
2768             collate->col_weight_max = arg->val.num;
2769           lr_ignore_rest (ldfile, 1);
2770           break;
2771
2772         case tok_section_symbol:
2773           /* Ignore the rest of the line if we don't need the input of
2774              this line.  */
2775           if (ignore_content)
2776             {
2777               lr_ignore_rest (ldfile, 0);
2778               break;
2779             }
2780
2781           if (state != 0)
2782             goto err_label;
2783
2784           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2785           if (arg->tok != tok_bsymbol)
2786             goto err_label;
2787           else if (!ignore_content)
2788             {
2789               /* Check whether this section is already known.  */
2790               struct section_list *known = collate->sections;
2791               while (known != NULL)
2792                 {
2793                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2794                     break;
2795                   known = known->next;
2796                 }
2797
2798               if (known != NULL)
2799                 {
2800                   lr_error (ldfile,
2801                             _("%s: duplicate declaration of section `%s'"),
2802                             "LC_COLLATE", arg->val.str.startmb);
2803                   free (arg->val.str.startmb);
2804                 }
2805               else
2806                 collate->sections = make_seclist_elem (collate,
2807                                                        arg->val.str.startmb,
2808                                                        collate->sections);
2809
2810               lr_ignore_rest (ldfile, known == NULL);
2811             }
2812           else
2813             {
2814               free (arg->val.str.startmb);
2815               lr_ignore_rest (ldfile, 0);
2816             }
2817           break;
2818
2819         case tok_collating_element:
2820           /* Ignore the rest of the line if we don't need the input of
2821              this line.  */
2822           if (ignore_content)
2823             {
2824               lr_ignore_rest (ldfile, 0);
2825               break;
2826             }
2827
2828           if (state != 0 && state != 2)
2829             goto err_label;
2830
2831           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2832           if (arg->tok != tok_bsymbol)
2833             goto err_label;
2834           else
2835             {
2836               const char *symbol = arg->val.str.startmb;
2837               size_t symbol_len = arg->val.str.lenmb;
2838
2839               /* Next the `from' keyword.  */
2840               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2841               if (arg->tok != tok_from)
2842                 {
2843                   free ((char *) symbol);
2844                   goto err_label;
2845                 }
2846
2847               ldfile->return_widestr = 1;
2848               ldfile->translate_strings = 1;
2849
2850               /* Finally the string with the replacement.  */
2851               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2852
2853               ldfile->return_widestr = 0;
2854               ldfile->translate_strings = 0;
2855
2856               if (arg->tok != tok_string)
2857                 goto err_label;
2858
2859               if (!ignore_content && symbol != NULL)
2860                 {
2861                   /* The name is already defined.  */
2862                   if (check_duplicate (ldfile, collate, charmap,
2863                                        repertoire, symbol, symbol_len))
2864                     goto col_elem_free;
2865
2866                   if (arg->val.str.startmb != NULL)
2867                     insert_entry (&collate->elem_table, symbol, symbol_len,
2868                                   new_element (collate,
2869                                                arg->val.str.startmb,
2870                                                arg->val.str.lenmb - 1,
2871                                                arg->val.str.startwc,
2872                                                symbol, symbol_len, 0));
2873                 }
2874               else
2875                 {
2876                 col_elem_free:
2877                   if (symbol != NULL)
2878                     free ((char *) symbol);
2879                   if (arg->val.str.startmb != NULL)
2880                     free (arg->val.str.startmb);
2881                   if (arg->val.str.startwc != NULL)
2882                     free (arg->val.str.startwc);
2883                 }
2884               lr_ignore_rest (ldfile, 1);
2885             }
2886           break;
2887
2888         case tok_collating_symbol:
2889           /* Ignore the rest of the line if we don't need the input of
2890              this line.  */
2891           if (ignore_content)
2892             {
2893               lr_ignore_rest (ldfile, 0);
2894               break;
2895             }
2896
2897           if (state != 0 && state != 2)
2898             goto err_label;
2899
2900           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2901           if (arg->tok != tok_bsymbol)
2902             goto err_label;
2903           else
2904             {
2905               char *symbol = arg->val.str.startmb;
2906               size_t symbol_len = arg->val.str.lenmb;
2907               char *endsymbol = NULL;
2908               size_t endsymbol_len = 0;
2909               enum token_t ellipsis = tok_none;
2910
2911               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2912               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2913                 {
2914                   ellipsis = arg->tok;
2915
2916                   arg = lr_token (ldfile, charmap, result, repertoire,
2917                                   verbose);
2918                   if (arg->tok != tok_bsymbol)
2919                     {
2920                       free (symbol);
2921                       goto err_label;
2922                     }
2923
2924                   endsymbol = arg->val.str.startmb;
2925                   endsymbol_len = arg->val.str.lenmb;
2926
2927                   lr_ignore_rest (ldfile, 1);
2928                 }
2929               else if (arg->tok != tok_eol)
2930                 {
2931                   free (symbol);
2932                   goto err_label;
2933                 }
2934
2935               if (!ignore_content)
2936                 {
2937                   if (symbol == NULL
2938                       || (ellipsis != tok_none && endsymbol == NULL))
2939                     {
2940                       lr_error (ldfile, _("\
2941 %s: unknown character in collating symbol name"),
2942                                 "LC_COLLATE");
2943                       goto col_sym_free;
2944                     }
2945                   else if (ellipsis == tok_none)
2946                     {
2947                       /* A single symbol, no ellipsis.  */
2948                       if (check_duplicate (ldfile, collate, charmap,
2949                                            repertoire, symbol, symbol_len))
2950                         /* The name is already defined.  */
2951                         goto col_sym_free;
2952
2953                       insert_entry (&collate->sym_table, symbol, symbol_len,
2954                                     new_symbol (collate, symbol, symbol_len));
2955                     }
2956                   else if (symbol_len != endsymbol_len)
2957                     {
2958                     col_sym_inv_range:
2959                       lr_error (ldfile,
2960                                 _("invalid names for character range"));
2961                       goto col_sym_free;
2962                     }
2963                   else
2964                     {
2965                       /* Oh my, we have to handle an ellipsis.  First, as
2966                          usual, determine the common prefix and then
2967                          convert the rest into a range.  */
2968                       size_t prefixlen;
2969                       unsigned long int from;
2970                       unsigned long int to;
2971                       char *endp;
2972
2973                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2974                         if (symbol[prefixlen] != endsymbol[prefixlen])
2975                           break;
2976
2977                       /* Convert the rest into numbers.  */
2978                       symbol[symbol_len] = '\0';
2979                       from = strtoul (&symbol[prefixlen], &endp,
2980                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2981                       if (*endp != '\0')
2982                         goto col_sym_inv_range;
2983
2984                       endsymbol[symbol_len] = '\0';
2985                       to = strtoul (&endsymbol[prefixlen], &endp,
2986                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2987                       if (*endp != '\0')
2988                         goto col_sym_inv_range;
2989
2990                       if (from > to)
2991                         goto col_sym_inv_range;
2992
2993                       /* Now loop over all entries.  */
2994                       while (from <= to)
2995                         {
2996                           char *symbuf;
2997
2998                           symbuf = (char *) obstack_alloc (&collate->mempool,
2999                                                            symbol_len + 1);
3000
3001                           /* Create the name.  */
3002                           sprintf (symbuf,
3003                                    ellipsis == tok_ellipsis2
3004                                    ? "%.*s%.*lX" : "%.*s%.*lu",
3005                                    (int) prefixlen, symbol,
3006                                    (int) (symbol_len - prefixlen), from);
3007
3008                           if (check_duplicate (ldfile, collate, charmap,
3009                                                repertoire, symbuf, symbol_len))
3010                             /* The name is already defined.  */
3011                             goto col_sym_free;
3012
3013                           insert_entry (&collate->sym_table, symbuf,
3014                                         symbol_len,
3015                                         new_symbol (collate, symbuf,
3016                                                     symbol_len));
3017
3018                           /* Increment the counter.  */
3019                           ++from;
3020                         }
3021
3022                       goto col_sym_free;
3023                     }
3024                 }
3025               else
3026                 {
3027                 col_sym_free:
3028                   if (symbol != NULL)
3029                     free (symbol);
3030                   if (endsymbol != NULL)
3031                     free (endsymbol);
3032                 }
3033             }
3034           break;
3035
3036         case tok_symbol_equivalence:
3037           /* Ignore the rest of the line if we don't need the input of
3038              this line.  */
3039           if (ignore_content)
3040             {
3041               lr_ignore_rest (ldfile, 0);
3042               break;
3043             }
3044
3045           if (state != 0)
3046             goto err_label;
3047
3048           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3049           if (arg->tok != tok_bsymbol)
3050             goto err_label;
3051           else
3052             {
3053               const char *newname = arg->val.str.startmb;
3054               size_t newname_len = arg->val.str.lenmb;
3055               const char *symname;
3056               size_t symname_len;
3057               void *symval;     /* Actually struct symbol_t*  */
3058
3059               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3060               if (arg->tok != tok_bsymbol)
3061                 {
3062                   if (newname != NULL)
3063                     free ((char *) newname);
3064                   goto err_label;
3065                 }
3066
3067               symname = arg->val.str.startmb;
3068               symname_len = arg->val.str.lenmb;
3069
3070               if (newname == NULL)
3071                 {
3072                   lr_error (ldfile, _("\
3073 %s: unknown character in equivalent definition name"),
3074                             "LC_COLLATE");
3075
3076                 sym_equiv_free:
3077                   if (newname != NULL)
3078                     free ((char *) newname);
3079                   if (symname != NULL)
3080                     free ((char *) symname);
3081                   break;
3082                 }
3083               if (symname == NULL)
3084                 {
3085                   lr_error (ldfile, _("\
3086 %s: unknown character in equivalent definition value"),
3087                             "LC_COLLATE");
3088                   goto sym_equiv_free;
3089                 }
3090
3091               /* See whether the symbol name is already defined.  */
3092               if (find_entry (&collate->sym_table, symname, symname_len,
3093                               &symval) != 0)
3094                 {
3095                   lr_error (ldfile, _("\
3096 %s: unknown symbol `%s' in equivalent definition"),
3097                             "LC_COLLATE", symname);
3098                   goto sym_equiv_free;
3099                 }
3100
3101               if (insert_entry (&collate->sym_table,
3102                                 newname, newname_len, symval) < 0)
3103                 {
3104                   lr_error (ldfile, _("\
3105 error while adding equivalent collating symbol"));
3106                   goto sym_equiv_free;
3107                 }
3108
3109               free ((char *) symname);
3110             }
3111           lr_ignore_rest (ldfile, 1);
3112           break;
3113
3114         case tok_script:
3115           /* We get told about the scripts we know.  */
3116           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3117           if (arg->tok != tok_bsymbol)
3118             goto err_label;
3119           else
3120             {
3121               struct section_list *runp = collate->known_sections;
3122               char *name;
3123
3124               while (runp != NULL)
3125                 if (strncmp (runp->name, arg->val.str.startmb,
3126                              arg->val.str.lenmb) == 0
3127                     && runp->name[arg->val.str.lenmb] == '\0')
3128                   break;
3129                 else
3130                   runp = runp->def_next;
3131
3132               if (runp != NULL)
3133                 {
3134                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3135                             runp->name);
3136                   lr_ignore_rest (ldfile, 0);
3137                   break;
3138                 }
3139
3140               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3141               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3142               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3143               name[arg->val.str.lenmb] = '\0';
3144               runp->name = name;
3145
3146               runp->def_next = collate->known_sections;
3147               collate->known_sections = runp;
3148             }
3149           lr_ignore_rest (ldfile, 1);
3150           break;
3151
3152         case tok_order_start:
3153           /* Ignore the rest of the line if we don't need the input of
3154              this line.  */
3155           if (ignore_content)
3156             {
3157               lr_ignore_rest (ldfile, 0);
3158               break;
3159             }
3160
3161           if (state != 0 && state != 1 && state != 2)
3162             goto err_label;
3163           state = 1;
3164
3165           /* The 14652 draft does not specify whether all `order_start' lines
3166              must contain the same number of sort-rules, but 14651 does.  So
3167              we require this here as well.  */
3168           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3169           if (arg->tok == tok_bsymbol)
3170             {
3171               /* This better should be a section name.  */
3172               struct section_list *sp = collate->known_sections;
3173               while (sp != NULL
3174                      && (sp->name == NULL
3175                          || strncmp (sp->name, arg->val.str.startmb,
3176                                      arg->val.str.lenmb) != 0
3177                          || sp->name[arg->val.str.lenmb] != '\0'))
3178                 sp = sp->def_next;
3179
3180               if (sp == NULL)
3181                 {
3182                   lr_error (ldfile, _("\
3183 %s: unknown section name `%.*s'"),
3184                             "LC_COLLATE", (int) arg->val.str.lenmb,
3185                             arg->val.str.startmb);
3186                   /* We use the error section.  */
3187                   collate->current_section = &collate->error_section;
3188
3189                   if (collate->error_section.first == NULL)
3190                     {
3191                       /* Insert &collate->error_section at the end of
3192                          the collate->sections list.  */
3193                       if (collate->sections == NULL)
3194                         collate->sections = &collate->error_section;
3195                       else
3196                         {
3197                           sp = collate->sections;
3198                           while (sp->next != NULL)
3199                             sp = sp->next;
3200
3201                           sp->next = &collate->error_section;
3202                         }
3203                       collate->error_section.next = NULL;
3204                     }
3205                 }
3206               else
3207                 {
3208                   /* One should not be allowed to open the same
3209                      section twice.  */
3210                   if (sp->first != NULL)
3211                     lr_error (ldfile, _("\
3212 %s: multiple order definitions for section `%s'"),
3213                               "LC_COLLATE", sp->name);
3214                   else
3215                     {
3216                       /* Insert sp in the collate->sections list,
3217                          right after collate->current_section.  */
3218                       if (collate->current_section == NULL)
3219                         collate->current_section = sp;
3220                       else
3221                         {
3222                           sp->next = collate->current_section->next;
3223                           collate->current_section->next = sp;
3224                         }
3225                     }
3226
3227                   /* Next should come the end of the line or a semicolon.  */
3228                   arg = lr_token (ldfile, charmap, result, repertoire,
3229                                   verbose);
3230                   if (arg->tok == tok_eol)
3231                     {
3232                       uint32_t cnt;
3233
3234                       /* This means we have exactly one rule: `forward'.  */
3235                       if (nrules > 1)
3236                         lr_error (ldfile, _("\
3237 %s: invalid number of sorting rules"),
3238                                   "LC_COLLATE");
3239                       else
3240                         nrules = 1;
3241                       sp->rules = obstack_alloc (&collate->mempool,
3242                                                  (sizeof (enum coll_sort_rule)
3243                                                   * nrules));
3244                       for (cnt = 0; cnt < nrules; ++cnt)
3245                         sp->rules[cnt] = sort_forward;
3246
3247                       /* Next line.  */
3248                       break;
3249                     }
3250
3251                   /* Get the next token.  */
3252                   arg = lr_token (ldfile, charmap, result, repertoire,
3253                                   verbose);
3254                 }
3255             }
3256           else
3257             {
3258               /* There is no section symbol.  Therefore we use the unnamed
3259                  section.  */
3260               collate->current_section = &collate->unnamed_section;
3261
3262               if (collate->unnamed_section.first != NULL)
3263                 lr_error (ldfile, _("\
3264 %s: multiple order definitions for unnamed section"),
3265                           "LC_COLLATE");
3266               else
3267                 {
3268                   /* Insert &collate->unnamed_section at the beginning of
3269                      the collate->sections list.  */
3270                   collate->unnamed_section.next = collate->sections;
3271                   collate->sections = &collate->unnamed_section;
3272                 }
3273             }
3274
3275           /* Now read the direction names.  */
3276           read_directions (ldfile, arg, charmap, repertoire, result);
3277
3278           /* From now we need the strings untranslated.  */
3279           ldfile->translate_strings = 0;
3280           break;
3281
3282         case tok_order_end:
3283           /* Ignore the rest of the line if we don't need the input of
3284              this line.  */
3285           if (ignore_content)
3286             {
3287               lr_ignore_rest (ldfile, 0);
3288               break;
3289             }
3290
3291           if (state != 1)
3292             goto err_label;
3293
3294           /* Handle ellipsis at end of list.  */
3295           if (was_ellipsis != tok_none)
3296             {
3297               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3298                                repertoire, result);
3299               was_ellipsis = tok_none;
3300             }
3301
3302           state = 2;
3303           lr_ignore_rest (ldfile, 1);
3304           break;
3305
3306         case tok_reorder_after:
3307           /* Ignore the rest of the line if we don't need the input of
3308              this line.  */
3309           if (ignore_content)
3310             {
3311               lr_ignore_rest (ldfile, 0);
3312               break;
3313             }
3314
3315           if (state == 1)
3316             {
3317               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3318                         "LC_COLLATE");
3319               state = 2;
3320
3321               /* Handle ellipsis at end of list.  */
3322               if (was_ellipsis != tok_none)
3323                 {
3324                   handle_ellipsis (ldfile, arg->val.str.startmb,
3325                                    arg->val.str.lenmb, was_ellipsis, charmap,
3326                                    repertoire, result);
3327                   was_ellipsis = tok_none;
3328                 }
3329             }
3330           else if (state != 2 && state != 3)
3331             goto err_label;
3332           state = 3;
3333
3334           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3335           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3336             {
3337               /* Find this symbol in the sequence table.  */
3338               char ucsbuf[10];
3339               char *startmb;
3340               size_t lenmb;
3341               struct element_t *insp;
3342               int no_error = 1;
3343               void *ptr;
3344
3345               if (arg->tok == tok_bsymbol)
3346                 {
3347                   startmb = arg->val.str.startmb;
3348                   lenmb = arg->val.str.lenmb;
3349                 }
3350               else
3351                 {
3352                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3353                   startmb = ucsbuf;
3354                   lenmb = 9;
3355                 }
3356
3357               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3358                 /* Yes, the symbol exists.  Simply point the cursor
3359                    to it.  */
3360                 collate->cursor = (struct element_t *) ptr;
3361               else
3362                 {
3363                   struct symbol_t *symbp;
3364                   void *ptr;
3365
3366                   if (find_entry (&collate->sym_table, startmb, lenmb,
3367                                   &ptr) == 0)
3368                     {
3369                       symbp = ptr;
3370
3371                       if (symbp->order->last != NULL
3372                           || symbp->order->next != NULL)
3373                         collate->cursor = symbp->order;
3374                       else
3375                         {
3376                           /* This is a collating symbol but its position
3377                              is not yet defined.  */
3378                           lr_error (ldfile, _("\
3379 %s: order for collating symbol %.*s not yet defined"),
3380                                     "LC_COLLATE", (int) lenmb, startmb);
3381                           collate->cursor = NULL;
3382                           no_error = 0;
3383                         }
3384                     }
3385                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3386                                        &ptr) == 0)
3387                     {
3388                       insp = (struct element_t *) ptr;
3389
3390                       if (insp->last != NULL || insp->next != NULL)
3391                         collate->cursor = insp;
3392                       else
3393                         {
3394                           /* This is a collating element but its position
3395                              is not yet defined.  */
3396                           lr_error (ldfile, _("\
3397 %s: order for collating element %.*s not yet defined"),
3398                                     "LC_COLLATE", (int) lenmb, startmb);
3399                           collate->cursor = NULL;
3400                           no_error = 0;
3401                         }
3402                     }
3403                   else
3404                     {
3405                       /* This is bad.  The symbol after which we have to
3406                          insert does not exist.  */
3407                       lr_error (ldfile, _("\
3408 %s: cannot reorder after %.*s: symbol not known"),
3409                                 "LC_COLLATE", (int) lenmb, startmb);
3410                       collate->cursor = NULL;
3411                       no_error = 0;
3412                     }
3413                 }
3414
3415               lr_ignore_rest (ldfile, no_error);
3416             }
3417           else
3418             /* This must not happen.  */
3419             goto err_label;
3420           break;
3421
3422         case tok_reorder_end:
3423           /* Ignore the rest of the line if we don't need the input of
3424              this line.  */
3425           if (ignore_content)
3426             break;
3427
3428           if (state != 3)
3429             goto err_label;
3430           state = 4;
3431           lr_ignore_rest (ldfile, 1);
3432           break;
3433
3434         case tok_reorder_sections_after:
3435           /* Ignore the rest of the line if we don't need the input of
3436              this line.  */
3437           if (ignore_content)
3438             {
3439               lr_ignore_rest (ldfile, 0);
3440               break;
3441             }
3442
3443           if (state == 1)
3444             {
3445               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3446                         "LC_COLLATE");
3447               state = 2;
3448
3449               /* Handle ellipsis at end of list.  */
3450               if (was_ellipsis != tok_none)
3451                 {
3452                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3453                                    repertoire, result);
3454                   was_ellipsis = tok_none;
3455                 }
3456             }
3457           else if (state == 3)
3458             {
3459               WITH_CUR_LOCALE (error (0, 0, _("\
3460 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3461               state = 4;
3462             }
3463           else if (state != 2 && state != 4)
3464             goto err_label;
3465           state = 5;
3466
3467           /* Get the name of the sections we are adding after.  */
3468           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3469           if (arg->tok == tok_bsymbol)
3470             {
3471               /* Now find a section with this name.  */
3472               struct section_list *runp = collate->sections;
3473
3474               while (runp != NULL)
3475                 {
3476                   if (runp->name != NULL
3477                       && strlen (runp->name) == arg->val.str.lenmb
3478                       && memcmp (runp->name, arg->val.str.startmb,
3479                                  arg->val.str.lenmb) == 0)
3480                     break;
3481
3482                   runp = runp->next;
3483                 }
3484
3485               if (runp != NULL)
3486                 collate->current_section = runp;
3487               else
3488                 {
3489                   /* This is bad.  The section after which we have to
3490                      reorder does not exist.  Therefore we cannot
3491                      process the whole rest of this reorder
3492                      specification.  */
3493                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3494                             "LC_COLLATE", (int) arg->val.str.lenmb,
3495                             arg->val.str.startmb);
3496
3497                   do
3498                     {
3499                       lr_ignore_rest (ldfile, 0);
3500
3501                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3502                     }
3503                   while (now->tok == tok_reorder_sections_after
3504                          || now->tok == tok_reorder_sections_end
3505                          || now->tok == tok_end);
3506
3507                   /* Process the token we just saw.  */
3508                   nowtok = now->tok;
3509                   continue;
3510                 }
3511             }
3512           else
3513             /* This must not happen.  */
3514             goto err_label;
3515           break;
3516
3517         case tok_reorder_sections_end:
3518           /* Ignore the rest of the line if we don't need the input of
3519              this line.  */
3520           if (ignore_content)
3521             break;
3522
3523           if (state != 5)
3524             goto err_label;
3525           state = 6;
3526           lr_ignore_rest (ldfile, 1);
3527           break;
3528
3529         case tok_bsymbol:
3530         case tok_ucs4:
3531           /* Ignore the rest of the line if we don't need the input of
3532              this line.  */
3533           if (ignore_content)
3534             {
3535               lr_ignore_rest (ldfile, 0);
3536               break;
3537             }
3538
3539           if (state != 0 && state != 1 && state != 3 && state != 5)
3540             goto err_label;
3541
3542           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3543             goto err_label;
3544
3545           if (nowtok == tok_ucs4)
3546             {
3547               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3548               symstr = ucs4buf;
3549               symlen = 9;
3550             }
3551           else if (arg != NULL)
3552             {
3553               symstr = arg->val.str.startmb;
3554               symlen = arg->val.str.lenmb;
3555             }
3556           else
3557             {
3558               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3559                         (int) ldfile->token.val.str.lenmb,
3560                         ldfile->token.val.str.startmb);
3561               break;
3562             }
3563
3564           struct element_t *seqp;
3565           if (state == 0)
3566             {
3567               /* We are outside an `order_start' region.  This means
3568                  we must only accept definitions of values for
3569                  collation symbols since these are purely abstract
3570                  values and don't need directions associated.  */
3571               void *ptr;
3572
3573               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3574                 {
3575                   seqp = ptr;
3576
3577                   /* It's already defined.  First check whether this
3578                      is really a collating symbol.  */
3579                   if (seqp->is_character)
3580                     goto err_label;
3581
3582                   goto move_entry;
3583                 }
3584               else
3585                 {
3586                   void *result;
3587
3588                   if (find_entry (&collate->sym_table, symstr, symlen,
3589                                   &result) != 0)
3590                     /* No collating symbol, it's an error.  */
3591                     goto err_label;
3592
3593                   /* Maybe this is the first time we define a symbol
3594                      value and it is before the first actual section.  */
3595                   if (collate->sections == NULL)
3596                     collate->sections = collate->current_section =
3597                       &collate->symbol_section;
3598                 }
3599
3600               if (was_ellipsis != tok_none)
3601                 {
3602                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3603                                    charmap, repertoire, result);
3604
3605                   /* Remember that we processed the ellipsis.  */
3606                   was_ellipsis = tok_none;
3607
3608                   /* And don't add the value a second time.  */
3609                   break;
3610                 }
3611             }
3612           else if (state == 3)
3613             {
3614               /* It is possible that we already have this collation sequence.
3615                  In this case we move the entry.  */
3616               void *sym;
3617               void *ptr;
3618
3619               /* If the symbol after which we have to insert was not found
3620                  ignore all entries.  */
3621               if (collate->cursor == NULL)
3622                 {
3623                   lr_ignore_rest (ldfile, 0);
3624                   break;
3625                 }
3626
3627               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3628                 {
3629                   seqp = (struct element_t *) ptr;
3630                   goto move_entry;
3631                 }
3632
3633               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3634                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3635                 goto move_entry;
3636
3637               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3638                   && (seqp = (struct element_t *) ptr,
3639                       seqp->last != NULL || seqp->next != NULL
3640                       || (collate->start != NULL && seqp == collate->start)))
3641                 {
3642                 move_entry:
3643                   /* Remove the entry from the old position.  */
3644                   if (seqp->last == NULL)
3645                     collate->start = seqp->next;
3646                   else
3647                     seqp->last->next = seqp->next;
3648                   if (seqp->next != NULL)
3649                     seqp->next->last = seqp->last;
3650
3651                   /* We also have to check whether this entry is the
3652                      first or last of a section.  */
3653                   if (seqp->section->first == seqp)
3654                     {
3655                       if (seqp->section->first == seqp->section->last)
3656                         /* This section has no content anymore.  */
3657                         seqp->section->first = seqp->section->last = NULL;
3658                       else
3659                         seqp->section->first = seqp->next;
3660                     }
3661                   else if (seqp->section->last == seqp)
3662                     seqp->section->last = seqp->last;
3663
3664                   /* Now insert it in the new place.  */
3665                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3666                                   tok_none);
3667                   break;
3668                 }
3669
3670               /* Otherwise we just add a new entry.  */
3671             }
3672           else if (state == 5)
3673             {
3674               /* We are reordering sections.  Find the named section.  */
3675               struct section_list *runp = collate->sections;
3676               struct section_list *prevp = NULL;
3677
3678               while (runp != NULL)
3679                 {
3680                   if (runp->name != NULL
3681                       && strlen (runp->name) == symlen
3682                       && memcmp (runp->name, symstr, symlen) == 0)
3683                     break;
3684
3685                   prevp = runp;
3686                   runp = runp->next;
3687                 }
3688
3689               if (runp == NULL)
3690                 {
3691                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3692                             "LC_COLLATE", (int) symlen, symstr);
3693                   lr_ignore_rest (ldfile, 0);
3694                 }
3695               else
3696                 {
3697                   if (runp != collate->current_section)
3698                     {
3699                       /* Remove the named section from the old place and
3700                          insert it in the new one.  */
3701                       prevp->next = runp->next;
3702
3703                       runp->next = collate->current_section->next;
3704                       collate->current_section->next = runp;
3705                       collate->current_section = runp;
3706                     }
3707
3708                   /* Process the rest of the line which might change
3709                      the collation rules.  */
3710                   arg = lr_token (ldfile, charmap, result, repertoire,
3711                                   verbose);
3712                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3713                     read_directions (ldfile, arg, charmap, repertoire,
3714                                      result);
3715                 }
3716               break;
3717             }
3718           else if (was_ellipsis != tok_none)
3719             {
3720               /* Using the information in the `ellipsis_weight'
3721                  element and this and the last value we have to handle
3722                  the ellipsis now.  */
3723               assert (state == 1);
3724
3725               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3726                                repertoire, result);
3727
3728               /* Remember that we processed the ellipsis.  */
3729               was_ellipsis = tok_none;
3730
3731               /* And don't add the value a second time.  */
3732               break;
3733             }
3734
3735           /* Now insert in the new place.  */
3736           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3737           break;
3738
3739         case tok_undefined:
3740           /* Ignore the rest of the line if we don't need the input of
3741              this line.  */
3742           if (ignore_content)
3743             {
3744               lr_ignore_rest (ldfile, 0);
3745               break;
3746             }
3747
3748           if (state != 1)
3749             goto err_label;
3750
3751           if (was_ellipsis != tok_none)
3752             {
3753               lr_error (ldfile,
3754                         _("%s: cannot have `%s' as end of ellipsis range"),
3755                         "LC_COLLATE", "UNDEFINED");
3756
3757               unlink_element (collate);
3758               was_ellipsis = tok_none;
3759             }
3760
3761           /* See whether UNDEFINED already appeared somewhere.  */
3762           if (collate->undefined.next != NULL
3763               || &collate->undefined == collate->cursor)
3764             {
3765               lr_error (ldfile,
3766                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3767                         "LC_COLLATE", 9, "UNDEFINED",
3768                         collate->undefined.file,
3769                         collate->undefined.line);
3770               lr_ignore_rest (ldfile, 0);
3771             }
3772           else
3773             /* Parse the weights.  */
3774              insert_weights (ldfile, &collate->undefined, charmap,
3775                              repertoire, result, tok_none);
3776           break;
3777
3778         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3779         case tok_ellipsis3: /* absolute ellipsis */
3780         case tok_ellipsis4: /* symbolic decimal ellipsis */
3781           /* This is the symbolic (decimal or hexadecimal) or absolute
3782              ellipsis.  */
3783           if (was_ellipsis != tok_none)
3784             goto err_label;
3785
3786           if (state != 0 && state != 1 && state != 3)
3787             goto err_label;
3788
3789           was_ellipsis = nowtok;
3790
3791           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3792                           repertoire, result, nowtok);
3793           break;
3794
3795         case tok_end:
3796           /* Next we assume `LC_COLLATE'.  */
3797           if (!ignore_content)
3798             {
3799               if (state == 0)
3800                 /* We must either see a copy statement or have
3801                    ordering values.  */
3802                 lr_error (ldfile,
3803                           _("%s: empty category description not allowed"),
3804                           "LC_COLLATE");
3805               else if (state == 1)
3806                 {
3807                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3808                             "LC_COLLATE");
3809
3810                   /* Handle ellipsis at end of list.  */
3811                   if (was_ellipsis != tok_none)
3812                     {
3813                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3814                                        repertoire, result);
3815                       was_ellipsis = tok_none;
3816                     }
3817                 }
3818               else if (state == 3)
3819                 WITH_CUR_LOCALE (error (0, 0, _("\
3820 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3821               else if (state == 5)
3822                 WITH_CUR_LOCALE (error (0, 0, _("\
3823 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3824             }
3825           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3826           if (arg->tok == tok_eof)
3827             break;
3828           if (arg->tok == tok_eol)
3829             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3830           else if (arg->tok != tok_lc_collate)
3831             lr_error (ldfile, _("\
3832 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3833           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3834           return;
3835
3836         default:
3837         err_label:
3838           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3839         }
3840
3841       /* Prepare for the next round.  */
3842       now = lr_token (ldfile, charmap, result, NULL, verbose);
3843       nowtok = now->tok;
3844     }
3845
3846   /* When we come here we reached the end of the file.  */
3847   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3848 }