locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <errno.h>
  24 #include <error.h>
  25 #include <stdlib.h>
  26 #include <wchar.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35
  36 /* Uncomment the following line in the production version.  */
  37 /* #define NDEBUG 1 */
  38 #include <assert.h>
  39
  40 #define obstack_chunk_alloc malloc
  41 #define obstack_chunk_free free
  42
  43 static inline void
  44 __attribute ((always_inline))
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 __attribute ((always_inline))
  55 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  56 {
  57   if (sizeof (int32_t) == sizeof (int))
  58     obstack_int_grow_fast (obstack, data);
  59   else
  60     obstack_grow (obstack, &data, sizeof (int32_t));
  61 }
  62
  63 /* Forward declaration.  */
  64 struct element_t;
  65
  66 /* Data type for list of strings.  */
  67 struct section_list
  68 {
  69   /* Successor in the known_sections list.  */
  70   struct section_list *def_next;
  71   /* Successor in the sections list.  */
  72   struct section_list *next;
  73   /* Name of the section.  */
  74   const char *name;
  75   /* First element of this section.  */
  76   struct element_t *first;
  77   /* Last element of this section.  */
  78   struct element_t *last;
  79   /* These are the rules for this section.  */
  80   enum coll_sort_rule *rules;
  81   /* Index of the rule set in the appropriate section of the output file.  */
  82   int ruleidx;
  83 };
  84
  85 struct element_t;
  86
  87 struct element_list_t
  88 {
  89   /* Number of elements.  */
  90   int cnt;
  91
  92   struct element_t **w;
  93 };
  94
  95 /* Data type for collating element.  */
  96 struct element_t
  97 {
  98   const char *name;
  99
 100   const char *mbs;
 101   size_t nmbs;
 102   const uint32_t *wcs;
 103   size_t nwcs;
 104   int *mborder;
 105   int wcorder;
 106
 107   /* The following is a bit mask which bits are set if this element is
 108      used in the appropriate level.  Interesting for the singlebyte
 109      weight computation.
 110
 111      XXX The type here restricts the number of levels to 32.  It could
 112      be changed if necessary but I doubt this is necessary.  */
 113   unsigned int used_in_level;
 114
 115   struct element_list_t *weights;
 116
 117   /* Nonzero if this is a real character definition.  */
 118   int is_character;
 119
 120   /* Order of the character in the sequence.  This information will
 121      be used in range expressions.  */
 122   int mbseqorder;
 123   int wcseqorder;
 124
 125   /* Where does the definition come from.  */
 126   const char *file;
 127   size_t line;
 128
 129   /* Which section does this belong to.  */
 130   struct section_list *section;
 131
 132   /* Predecessor and successor in the order list.  */
 133   struct element_t *last;
 134   struct element_t *next;
 135
 136   /* Next element in multibyte output list.  */
 137   struct element_t *mbnext;
 138   struct element_t *mblast;
 139
 140   /* Next element in wide character output list.  */
 141   struct element_t *wcnext;
 142   struct element_t *wclast;
 143 };
 144
 145 /* Special element value.  */
 146 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 147 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 148 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 149
 150 /* Data type for collating symbol.  */
 151 struct symbol_t
 152 {
 153   const char *name;
 154
 155   /* Point to place in the order list.  */
 156   struct element_t *order;
 157
 158   /* Where does the definition come from.  */
 159   const char *file;
 160   size_t line;
 161 };
 162
 163 /* Sparse table of struct element_t *.  */
 164 #define TABLE wchead_table
 165 #define ELEMENT struct element_t *
 166 #define DEFAULT NULL
 167 #define ITERATE
 168 #define NO_FINALIZE
 169 #include "3level.h"
 170
 171 /* Sparse table of int32_t.  */
 172 #define TABLE collidx_table
 173 #define ELEMENT int32_t
 174 #define DEFAULT 0
 175 #include "3level.h"
 176
 177 /* Sparse table of uint32_t.  */
 178 #define TABLE collseq_table
 179 #define ELEMENT uint32_t
 180 #define DEFAULT ~((uint32_t) 0)
 181 #include "3level.h"
 182
 183
 184 /* The real definition of the struct for the LC_COLLATE locale.  */
 185 struct locale_collate_t
 186 {
 187   int col_weight_max;
 188   int cur_weight_max;
 189
 190   /* List of known scripts.  */
 191   struct section_list *known_sections;
 192   /* List of used sections.  */
 193   struct section_list *sections;
 194   /* Current section using definition.  */
 195   struct section_list *current_section;
 196   /* There always can be an unnamed section.  */
 197   struct section_list unnamed_section;
 198   /* To make handling of errors easier we have another section.  */
 199   struct section_list error_section;
 200   /* Sometimes we are defining the values for collating symbols before
 201      the first actual section.  */
 202   struct section_list symbol_section;
 203
 204   /* Start of the order list.  */
 205   struct element_t *start;
 206
 207   /* The undefined element.  */
 208   struct element_t undefined;
 209
 210   /* This is the cursor for `reorder_after' insertions.  */
 211   struct element_t *cursor;
 212
 213   /* This value is used when handling ellipsis.  */
 214   struct element_t ellipsis_weight;
 215
 216   /* Known collating elements.  */
 217   hash_table elem_table;
 218
 219   /* Known collating symbols.  */
 220   hash_table sym_table;
 221
 222   /* Known collation sequences.  */
 223   hash_table seq_table;
 224
 225   struct obstack mempool;
 226
 227   /* The LC_COLLATE category is a bit special as it is sometimes possible
 228      that the definitions from more than one input file contains information.
 229      Therefore we keep all relevant input in a list.  */
 230   struct locale_collate_t *next;
 231
 232   /* Arrays with heads of the list for each of the leading bytes in
 233      the multibyte sequences.  */
 234   struct element_t *mbheads[256];
 235
 236   /* Arrays with heads of the list for each of the leading bytes in
 237      the multibyte sequences.  */
 238   struct wchead_table wcheads;
 239
 240   /* The arrays with the collation sequence order.  */
 241   unsigned char mbseqorder[256];
 242   struct collseq_table wcseqorder;
 243 };
 244
 245
 246 /* We have a few global variables which are used for reading all
 247    LC_COLLATE category descriptions in all files.  */
 248 static uint32_t nrules;
 249
 250
 251 /* We need UTF-8 encoding of numbers.  */
 252 static inline int
 253 __attribute ((always_inline))
 254 utf8_encode (char *buf, int val)
 255 {
 256   int retval;
 257
 258   if (val < 0x80)
 259     {
 260       *buf++ = (char) val;
 261       retval = 1;
 262     }
 263   else
 264     {
 265       int step;
 266
 267       for (step = 2; step < 6; ++step)
 268         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 269           break;
 270       retval = step;
 271
 272       *buf = (unsigned char) (~0xff >> step);
 273       --step;
 274       do
 275         {
 276           buf[step] = 0x80 | (val & 0x3f);
 277           val >>= 6;
 278         }
 279       while (--step > 0);
 280       *buf |= val;
 281     }
 282
 283   return retval;
 284 }
 285
 286
 287 static struct section_list *
 288 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 289                    struct section_list *next)
 290 {
 291   struct section_list *newp;
 292
 293   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 294                                                 sizeof (*newp));
 295   newp->next = next;
 296   newp->name = string;
 297   newp->first = NULL;
 298   newp->last = NULL;
 299
 300   return newp;
 301 }
 302
 303
 304 static struct element_t *
 305 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 306              const uint32_t *wcs, const char *name, size_t namelen,
 307              int is_character)
 308 {
 309   struct element_t *newp;
 310
 311   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 312                                              sizeof (*newp));
 313   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 314                                                     name, namelen);
 315   if (mbs != NULL)
 316     {
 317       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 318       newp->nmbs = mbslen;
 319     }
 320   else
 321     {
 322       newp->mbs = NULL;
 323       newp->nmbs = 0;
 324     }
 325   if (wcs != NULL)
 326     {
 327       size_t nwcs = wcslen ((wchar_t *) wcs);
 328       uint32_t zero = 0;
 329       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 330       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 331       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 332       newp->nwcs = nwcs;
 333     }
 334   else
 335     {
 336       newp->wcs = NULL;
 337       newp->nwcs = 0;
 338     }
 339   newp->mborder = NULL;
 340   newp->wcorder = 0;
 341   newp->used_in_level = 0;
 342   newp->is_character = is_character;
 343
 344   /* Will be assigned later.  XXX  */
 345   newp->mbseqorder = 0;
 346   newp->wcseqorder = 0;
 347
 348   /* Will be allocated later.  */
 349   newp->weights = NULL;
 350
 351   newp->file = NULL;
 352   newp->line = 0;
 353
 354   newp->section = collate->current_section;
 355
 356   newp->last = NULL;
 357   newp->next = NULL;
 358
 359   newp->mbnext = NULL;
 360   newp->mblast = NULL;
 361
 362   newp->wcnext = NULL;
 363   newp->wclast = NULL;
 364
 365   return newp;
 366 }
 367
 368
 369 static struct symbol_t *
 370 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 371 {
 372   struct symbol_t *newp;
 373
 374   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 375
 376   newp->name = obstack_copy0 (&collate->mempool, name, len);
 377   newp->order = NULL;
 378
 379   newp->file = NULL;
 380   newp->line = 0;
 381
 382   return newp;
 383 }
 384
 385
 386 /* Test whether this name is already defined somewhere.  */
 387 static int
 388 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 389                  const struct charmap_t *charmap,
 390                  struct repertoire_t *repertoire, const char *symbol,
 391                  size_t symbol_len)
 392 {
 393   void *ignore = NULL;
 394
 395   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 396     {
 397       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 398                 (int) symbol_len, symbol);
 399       return 1;
 400     }
 401
 402   if (repertoire != NULL
 403       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 404           == 0))
 405     {
 406       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 407                 (int) symbol_len, symbol);
 408       return 1;
 409     }
 410
 411   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 412     {
 413       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 414                 (int) symbol_len, symbol);
 415       return 1;
 416     }
 417
 418   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 419     {
 420       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 421                 (int) symbol_len, symbol);
 422       return 1;
 423     }
 424
 425   return 0;
 426 }
 427
 428
 429 /* Read the direction specification.  */
 430 static void
 431 read_directions (struct linereader *ldfile, struct token *arg,
 432                  const struct charmap_t *charmap,
 433                  struct repertoire_t *repertoire, struct localedef_t *result)
 434 {
 435   int cnt = 0;
 436   int max = nrules ?: 10;
 437   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 438   int warned = 0;
 439   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 440
 441   while (1)
 442     {
 443       int valid = 0;
 444
 445       if (arg->tok == tok_forward)
 446         {
 447           if (rules[cnt] & sort_backward)
 448             {
 449               if (! warned)
 450                 {
 451                   lr_error (ldfile, _("\
 452 %s: `forward' and `backward' are mutually excluding each other"),
 453                             "LC_COLLATE");
 454                   warned = 1;
 455                 }
 456             }
 457           else if (rules[cnt] & sort_forward)
 458             {
 459               if (! warned)
 460                 {
 461                   lr_error (ldfile, _("\
 462 %s: `%s' mentioned more than once in definition of weight %d"),
 463                             "LC_COLLATE", "forward", cnt + 1);
 464                 }
 465             }
 466           else
 467             rules[cnt] |= sort_forward;
 468
 469           valid = 1;
 470         }
 471       else if (arg->tok == tok_backward)
 472         {
 473           if (rules[cnt] & sort_forward)
 474             {
 475               if (! warned)
 476                 {
 477                   lr_error (ldfile, _("\
 478 %s: `forward' and `backward' are mutually excluding each other"),
 479                             "LC_COLLATE");
 480                   warned = 1;
 481                 }
 482             }
 483           else if (rules[cnt] & sort_backward)
 484             {
 485               if (! warned)
 486                 {
 487                   lr_error (ldfile, _("\
 488 %s: `%s' mentioned more than once in definition of weight %d"),
 489                             "LC_COLLATE", "backward", cnt + 1);
 490                 }
 491             }
 492           else
 493             rules[cnt] |= sort_backward;
 494
 495           valid = 1;
 496         }
 497       else if (arg->tok == tok_position)
 498         {
 499           if (rules[cnt] & sort_position)
 500             {
 501               if (! warned)
 502                 {
 503                   lr_error (ldfile, _("\
 504 %s: `%s' mentioned more than once in definition of weight %d"),
 505                             "LC_COLLATE", "position", cnt + 1);
 506                 }
 507             }
 508           else
 509             rules[cnt] |= sort_position;
 510
 511           valid = 1;
 512         }
 513
 514       if (valid)
 515         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 516
 517       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 518           || arg->tok == tok_semicolon)
 519         {
 520           if (! valid && ! warned)
 521             {
 522               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 523               warned = 1;
 524             }
 525
 526           /* See whether we have to increment the counter.  */
 527           if (arg->tok != tok_comma && rules[cnt] != 0)
 528             {
 529               /* Add the default `forward' if we have seen only `position'.  */
 530               if (rules[cnt] == sort_position)
 531                 rules[cnt] = sort_position | sort_forward;
 532
 533               ++cnt;
 534             }
 535
 536           if (arg->tok == tok_eof || arg->tok == tok_eol)
 537             /* End of line or file, so we exit the loop.  */
 538             break;
 539
 540           if (nrules == 0)
 541             {
 542               /* See whether we have enough room in the array.  */
 543               if (cnt == max)
 544                 {
 545                   max += 10;
 546                   rules = (enum coll_sort_rule *) xrealloc (rules,
 547                                                             max
 548                                                             * sizeof (*rules));
 549                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 550                 }
 551             }
 552           else
 553             {
 554               if (cnt == nrules)
 555                 {
 556                   /* There must not be any more rule.  */
 557                   if (! warned)
 558                     {
 559                       lr_error (ldfile, _("\
 560 %s: too many rules; first entry only had %d"),
 561                                 "LC_COLLATE", nrules);
 562                       warned = 1;
 563                     }
 564
 565                   lr_ignore_rest (ldfile, 0);
 566                   break;
 567                 }
 568             }
 569         }
 570       else
 571         {
 572           if (! warned)
 573             {
 574               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 575               warned = 1;
 576             }
 577         }
 578
 579       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 580     }
 581
 582   if (nrules == 0)
 583     {
 584       /* Now we know how many rules we have.  */
 585       nrules = cnt;
 586       rules = (enum coll_sort_rule *) xrealloc (rules,
 587                                                 nrules * sizeof (*rules));
 588     }
 589   else
 590     {
 591       if (cnt < nrules)
 592         {
 593           /* Not enough rules in this specification.  */
 594           if (! warned)
 595             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 596
 597           do
 598             rules[cnt] = sort_forward;
 599           while (++cnt < nrules);
 600         }
 601     }
 602
 603   collate->current_section->rules = rules;
 604 }
 605
 606
 607 static struct element_t *
 608 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 609               const char *str, size_t len)
 610 {
 611   void *result = NULL;
 612
 613   /* Search for the entries among the collation sequences already define.  */
 614   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 615     {
 616       /* Nope, not define yet.  So we see whether it is a
 617          collation symbol.  */
 618       void *ptr;
 619
 620       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 621         {
 622           /* It's a collation symbol.  */
 623           struct symbol_t *sym = (struct symbol_t *) ptr;
 624           result = sym->order;
 625
 626           if (result == NULL)
 627             result = sym->order = new_element (collate, NULL, 0, NULL,
 628                                                NULL, 0, 0);
 629         }
 630       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 631         {
 632           /* It's also no collation element.  So it is a character
 633              element defined later.  */
 634           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 635           /* Insert it into the sequence table.  */
 636           insert_entry (&collate->seq_table, str, len, result);
 637         }
 638     }
 639
 640   return (struct element_t *) result;
 641 }
 642
 643
 644 static void
 645 unlink_element (struct locale_collate_t *collate)
 646 {
 647   if (collate->cursor == collate->start)
 648     {
 649       assert (collate->cursor->next == NULL);
 650       assert (collate->cursor->last == NULL);
 651       collate->cursor = NULL;
 652     }
 653   else
 654     {
 655       if (collate->cursor->next != NULL)
 656         collate->cursor->next->last = collate->cursor->last;
 657       if (collate->cursor->last != NULL)
 658         collate->cursor->last->next = collate->cursor->next;
 659       collate->cursor = collate->cursor->last;
 660     }
 661 }
 662
 663
 664 static void
 665 insert_weights (struct linereader *ldfile, struct element_t *elem,
 666                 const struct charmap_t *charmap,
 667                 struct repertoire_t *repertoire, struct localedef_t *result,
 668                 enum token_t ellipsis)
 669 {
 670   int weight_cnt;
 671   struct token *arg;
 672   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 673
 674   /* Initialize all the fields.  */
 675   elem->file = ldfile->fname;
 676   elem->line = ldfile->lineno;
 677
 678   elem->last = collate->cursor;
 679   elem->next = collate->cursor ? collate->cursor->next : NULL;
 680   if (collate->cursor != NULL && collate->cursor->next != NULL)
 681     collate->cursor->next->last = elem;
 682   if (collate->cursor != NULL)
 683     collate->cursor->next = elem;
 684   if (collate->start == NULL)
 685     {
 686       assert (collate->cursor == NULL);
 687       collate->start = elem;
 688     }
 689
 690   elem->section = collate->current_section;
 691
 692   if (collate->current_section->first == NULL)
 693     collate->current_section->first = elem;
 694   if (collate->current_section->last == collate->cursor)
 695     collate->current_section->last = elem;
 696
 697   collate->cursor = elem;
 698
 699   elem->weights = (struct element_list_t *)
 700     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 701   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 702
 703   weight_cnt = 0;
 704
 705   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 706   do
 707     {
 708       if (arg->tok == tok_eof || arg->tok == tok_eol)
 709         break;
 710
 711       if (arg->tok == tok_ignore)
 712         {
 713           /* The weight for this level has to be ignored.  We use the
 714              null pointer to indicate this.  */
 715           elem->weights[weight_cnt].w = (struct element_t **)
 716             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 717           elem->weights[weight_cnt].w[0] = NULL;
 718           elem->weights[weight_cnt].cnt = 1;
 719         }
 720       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 721         {
 722           char ucs4str[10];
 723           struct element_t *val;
 724           char *symstr;
 725           size_t symlen;
 726
 727           if (arg->tok == tok_bsymbol)
 728             {
 729               symstr = arg->val.str.startmb;
 730               symlen = arg->val.str.lenmb;
 731             }
 732           else
 733             {
 734               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 735               symstr = ucs4str;
 736               symlen = 9;
 737             }
 738
 739           val = find_element (ldfile, collate, symstr, symlen);
 740           if (val == NULL)
 741             break;
 742
 743           elem->weights[weight_cnt].w = (struct element_t **)
 744             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 745           elem->weights[weight_cnt].w[0] = val;
 746           elem->weights[weight_cnt].cnt = 1;
 747         }
 748       else if (arg->tok == tok_string)
 749         {
 750           /* Split the string up in the individual characters and put
 751              the element definitions in the list.  */
 752           const char *cp = arg->val.str.startmb;
 753           int cnt = 0;
 754           struct element_t *charelem;
 755           struct element_t **weights = NULL;
 756           int max = 0;
 757
 758           if (*cp == '\0')
 759             {
 760               lr_error (ldfile, _("%s: empty weight string not allowed"),
 761                         "LC_COLLATE");
 762               lr_ignore_rest (ldfile, 0);
 763               break;
 764             }
 765
 766           do
 767             {
 768               if (*cp == '<')
 769                 {
 770                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 771                      the latter we have to unify the name.  */
 772                   const char *startp = ++cp;
 773                   size_t len;
 774
 775                   while (*cp != '>')
 776                     {
 777                       if (*cp == ldfile->escape_char)
 778                         ++cp;
 779                       if (*cp == '\0')
 780                         /* It's a syntax error.  */
 781                         goto syntax;
 782
 783                       ++cp;
 784                     }
 785
 786                   if (cp - startp == 5 && startp[0] == 'U'
 787                       && isxdigit (startp[1]) && isxdigit (startp[2])
 788                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 789                     {
 790                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 791                       char *newstr;
 792
 793                       newstr = (char *) xmalloc (10);
 794                       snprintf (newstr, 10, "U%08X", ucs4);
 795                       startp = newstr;
 796
 797                       len = 9;
 798                     }
 799                   else
 800                     len = cp - startp;
 801
 802                   charelem = find_element (ldfile, collate, startp, len);
 803                   ++cp;
 804                 }
 805               else
 806                 {
 807                   /* People really shouldn't use characters directly in
 808                      the string.  Especially since it's not really clear
 809                      what this means.  We interpret all characters in the
 810                      string as if that would be bsymbols.  Otherwise we
 811                      would have to match back to bsymbols somehow and this
 812                      is normally not what people normally expect.  */
 813                   charelem = find_element (ldfile, collate, cp++, 1);
 814                 }
 815
 816               if (charelem == NULL)
 817                 {
 818                   /* We ignore the rest of the line.  */
 819                   lr_ignore_rest (ldfile, 0);
 820                   break;
 821                 }
 822
 823               /* Add the pointer.  */
 824               if (cnt >= max)
 825                 {
 826                   struct element_t **newp;
 827                   max += 10;
 828                   newp = (struct element_t **)
 829                     alloca (max * sizeof (struct element_t *));
 830                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 831                   weights = newp;
 832                 }
 833               weights[cnt++] = charelem;
 834             }
 835           while (*cp != '\0');
 836
 837           /* Now store the information.  */
 838           elem->weights[weight_cnt].w = (struct element_t **)
 839             obstack_alloc (&collate->mempool,
 840                            cnt * sizeof (struct element_t *));
 841           memcpy (elem->weights[weight_cnt].w, weights,
 842                   cnt * sizeof (struct element_t *));
 843           elem->weights[weight_cnt].cnt = cnt;
 844
 845           /* We don't need the string anymore.  */
 846           free (arg->val.str.startmb);
 847         }
 848       else if (ellipsis != tok_none
 849                && (arg->tok == tok_ellipsis2
 850                    || arg->tok == tok_ellipsis3
 851                    || arg->tok == tok_ellipsis4))
 852         {
 853           /* It must be the same ellipsis as used in the initial column.  */
 854           if (arg->tok != ellipsis)
 855             lr_error (ldfile, _("\
 856 %s: weights must use the same ellipsis symbol as the name"),
 857                       "LC_COLLATE");
 858
 859           /* The weight for this level will depend on the element
 860              iterating over the range.  Put a placeholder.  */
 861           elem->weights[weight_cnt].w = (struct element_t **)
 862             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 863           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 864           elem->weights[weight_cnt].cnt = 1;
 865         }
 866       else
 867         {
 868         syntax:
 869           /* It's a syntax error.  */
 870           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 871           lr_ignore_rest (ldfile, 0);
 872           break;
 873         }
 874
 875       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 876       /* This better should be the end of the line or a semicolon.  */
 877       if (arg->tok == tok_semicolon)
 878         /* OK, ignore this and read the next token.  */
 879         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 880       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 881         {
 882           /* It's a syntax error.  */
 883           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 884           lr_ignore_rest (ldfile, 0);
 885           break;
 886         }
 887     }
 888   while (++weight_cnt < nrules);
 889
 890   if (weight_cnt < nrules)
 891     {
 892       /* This means the rest of the line uses the current element as
 893          the weight.  */
 894       do
 895         {
 896           elem->weights[weight_cnt].w = (struct element_t **)
 897             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 898           if (ellipsis == tok_none)
 899             elem->weights[weight_cnt].w[0] = elem;
 900           else
 901             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 902           elem->weights[weight_cnt].cnt = 1;
 903         }
 904       while (++weight_cnt < nrules);
 905     }
 906   else
 907     {
 908       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 909         {
 910           /* Too many rule values.  */
 911           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 912           lr_ignore_rest (ldfile, 0);
 913         }
 914       else
 915         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 916     }
 917 }
 918
 919
 920 static int
 921 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 922               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 923               struct localedef_t *result)
 924 {
 925   /* First find out what kind of symbol this is.  */
 926   struct charseq *seq;
 927   uint32_t wc;
 928   struct element_t *elem = NULL;
 929   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 930
 931   /* Try to find the character in the charmap.  */
 932   seq = charmap_find_value (charmap, symstr, symlen);
 933
 934   /* Determine the wide character.  */
 935   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 936     {
 937       wc = repertoire_find_value (repertoire, symstr, symlen);
 938       if (seq != NULL)
 939         seq->ucs4 = wc;
 940     }
 941   else
 942     wc = seq->ucs4;
 943
 944   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 945     {
 946       /* It's no character, so look through the collation elements and
 947          symbol list.  */
 948       void *ptr = elem;
 949       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 950         {
 951           void *result;
 952           struct symbol_t *sym = NULL;
 953
 954           /* It's also collation element.  Therefore it's either a
 955              collating symbol or it's a character which is not
 956              supported by the character set.  In the later case we
 957              simply create a dummy entry.  */
 958           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 959             {
 960               /* It's a collation symbol.  */
 961               sym = (struct symbol_t *) result;
 962
 963               elem = sym->order;
 964             }
 965
 966           if (elem == NULL)
 967             {
 968               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 969
 970               if (sym != NULL)
 971                 sym->order = elem;
 972               else
 973                 /* Enter a fake element in the sequence table.  This
 974                    won't cause anything in the output since there is
 975                    no multibyte or wide character associated with
 976                    it.  */
 977                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 978             }
 979         }
 980       else
 981         /* Copy the result back.  */
 982         elem = ptr;
 983     }
 984   else
 985     {
 986       /* Otherwise the symbols stands for a character.  */
 987       void *ptr = elem;
 988       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
 989         {
 990           uint32_t wcs[2] = { wc, 0 };
 991
 992           /* We have to allocate an entry.  */
 993           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 994                               seq != NULL ? seq->nbytes : 0,
 995                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 996                               symstr, symlen, 1);
 997
 998           /* And add it to the table.  */
 999           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1000             /* This cannot happen.  */
1001             assert (! "Internal error");
1002         }
1003       else
1004         {
1005           /* Copy the result back.  */
1006           elem = ptr;
1007
1008           /* Maybe the character was used before the definition.  In this case
1009              we have to insert the byte sequences now.  */
1010           if (elem->mbs == NULL && seq != NULL)
1011             {
1012               elem->mbs = obstack_copy0 (&collate->mempool,
1013                                          seq->bytes, seq->nbytes);
1014               elem->nmbs = seq->nbytes;
1015             }
1016
1017           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1018             {
1019               uint32_t wcs[2] = { wc, 0 };
1020
1021               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1022               elem->nwcs = 1;
1023             }
1024         }
1025     }
1026
1027   /* Test whether this element is not already in the list.  */
1028   if (elem->next != NULL || elem == collate->cursor)
1029     {
1030       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1031                 (int) symlen, symstr, elem->file, elem->line);
1032       lr_ignore_rest (ldfile, 0);
1033       return 1;
1034     }
1035
1036   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1037
1038   return 0;
1039 }
1040
1041
1042 static void
1043 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1044                  enum token_t ellipsis, const struct charmap_t *charmap,
1045                  struct repertoire_t *repertoire,
1046                  struct localedef_t *result)
1047 {
1048   struct element_t *startp;
1049   struct element_t *endp;
1050   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1051
1052   /* Unlink the entry added for the ellipsis.  */
1053   unlink_element (collate);
1054   startp = collate->cursor;
1055
1056   /* Process and add the end-entry.  */
1057   if (symstr != NULL
1058       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1059     /* Something went wrong with inserting the to-value.  This means
1060        we cannot process the ellipsis.  */
1061     return;
1062
1063   /* Reset the cursor.  */
1064   collate->cursor = startp;
1065
1066   /* Now we have to handle many different situations:
1067      - we have to distinguish between the three different ellipsis forms
1068      - the is the ellipsis at the beginning, in the middle, or at the end.
1069   */
1070   endp = collate->cursor->next;
1071   assert (symstr == NULL || endp != NULL);
1072
1073   /* XXX The following is probably very wrong since also collating symbols
1074      can appear in ranges.  But do we want/can refine the test for that?  */
1075 #if 0
1076   /* Both, the start and the end symbol, must stand for characters.  */
1077   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1078       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1079     {
1080       lr_error (ldfile, _("\
1081 %s: the start and the end symbol of a range must stand for characters"),
1082                 "LC_COLLATE");
1083       return;
1084     }
1085 #endif
1086
1087   if (ellipsis == tok_ellipsis3)
1088     {
1089       /* One requirement we make here: the length of the byte
1090          sequences for the first and end character must be the same.
1091          This is mainly to prevent unwanted effects and this is often
1092          not what is wanted.  */
1093       size_t len = (startp->mbs != NULL ? startp->nmbs
1094                     : (endp->mbs != NULL ? endp->nmbs : 0));
1095       char mbcnt[len + 1];
1096       char mbend[len + 1];
1097
1098       /* Well, this should be caught somewhere else already.  Just to
1099          make sure.  */
1100       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1101       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1102
1103       if (startp != NULL && endp != NULL
1104           && startp->mbs != NULL && endp->mbs != NULL
1105           && startp->nmbs != endp->nmbs)
1106         {
1107           lr_error (ldfile, _("\
1108 %s: byte sequences of first and last character must have the same length"),
1109                     "LC_COLLATE");
1110           return;
1111         }
1112
1113       /* Determine whether we have to generate multibyte sequences.  */
1114       if ((startp == NULL || startp->mbs != NULL)
1115           && (endp == NULL || endp->mbs != NULL))
1116         {
1117           int cnt;
1118           int ret;
1119
1120           /* Prepare the beginning byte sequence.  This is either from the
1121              beginning byte sequence or it is all nulls if it was an
1122              initial ellipsis.  */
1123           if (startp == NULL || startp->mbs == NULL)
1124             memset (mbcnt, '\0', len);
1125           else
1126             {
1127               memcpy (mbcnt, startp->mbs, len);
1128
1129               /* And increment it so that the value is the first one we will
1130                  try to insert.  */
1131               for (cnt = len - 1; cnt >= 0; --cnt)
1132                 if (++mbcnt[cnt] != '\0')
1133                   break;
1134             }
1135           mbcnt[len] = '\0';
1136
1137           /* And the end sequence.  */
1138           if (endp == NULL || endp->mbs == NULL)
1139             memset (mbend, '\0', len);
1140           else
1141             memcpy (mbend, endp->mbs, len);
1142           mbend[len] = '\0';
1143
1144           /* Test whether we have a correct range.  */
1145           ret = memcmp (mbcnt, mbend, len);
1146           if (ret >= 0)
1147             {
1148               if (ret > 0)
1149                 lr_error (ldfile, _("%s: byte sequence of first character of \
1150 range is not lower than that of the last character"), "LC_COLLATE");
1151               return;
1152             }
1153
1154           /* Generate the byte sequences data.  */
1155           while (1)
1156             {
1157               struct charseq *seq;
1158
1159               /* Quite a bit of work ahead.  We have to find the character
1160                  definition for the byte sequence and then determine the
1161                  wide character belonging to it.  */
1162               seq = charmap_find_symbol (charmap, mbcnt, len);
1163               if (seq != NULL)
1164                 {
1165                   struct element_t *elem;
1166                   size_t namelen;
1167
1168                   /* I don't think this can ever happen.  */
1169                   assert (seq->name != NULL);
1170                   namelen = strlen (seq->name);
1171
1172                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1173                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1174                                                        namelen);
1175
1176                   /* Now we are ready to insert the new value in the
1177                      sequence.  Find out whether the element is
1178                      already known.  */
1179                   void *ptr;
1180                   if (find_entry (&collate->seq_table, seq->name, namelen,
1181                                   &ptr) != 0)
1182                     {
1183                       uint32_t wcs[2] = { seq->ucs4, 0 };
1184
1185                       /* We have to allocate an entry.  */
1186                       elem = new_element (collate, mbcnt, len,
1187                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1188                                           ? NULL : wcs, seq->name,
1189                                           namelen, 1);
1190
1191                       /* And add it to the table.  */
1192                       if (insert_entry (&collate->seq_table, seq->name,
1193                                         namelen, elem) != 0)
1194                         /* This cannot happen.  */
1195                         assert (! "Internal error");
1196                     }
1197                   else
1198                     /* Copy the result.  */
1199                     elem = ptr;
1200
1201                   /* Test whether this element is not already in the list.  */
1202                   if (elem->next != NULL || (collate->cursor != NULL
1203                                              && elem->next == collate->cursor))
1204                     {
1205                       lr_error (ldfile, _("\
1206 order for `%.*s' already defined at %s:%Zu"),
1207                                 (int) namelen, seq->name,
1208                                 elem->file, elem->line);
1209                       goto increment;
1210                     }
1211
1212                   /* Enqueue the new element.  */
1213                   elem->last = collate->cursor;
1214                   if (collate->cursor == NULL)
1215                     elem->next = NULL;
1216                   else
1217                     {
1218                       elem->next = collate->cursor->next;
1219                       elem->last->next = elem;
1220                       if (elem->next != NULL)
1221                         elem->next->last = elem;
1222                     }
1223                   if (collate->start == NULL)
1224                     {
1225                       assert (collate->cursor == NULL);
1226                       collate->start = elem;
1227                     }
1228                   collate->cursor = elem;
1229
1230                  /* Add the weight value.  We take them from the
1231                     `ellipsis_weights' member of `collate'.  */
1232                   elem->weights = (struct element_list_t *)
1233                     obstack_alloc (&collate->mempool,
1234                                    nrules * sizeof (struct element_list_t));
1235                   for (cnt = 0; cnt < nrules; ++cnt)
1236                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1237                         && (collate->ellipsis_weight.weights[cnt].w[0]
1238                             == ELEMENT_ELLIPSIS2))
1239                       {
1240                         elem->weights[cnt].w = (struct element_t **)
1241                           obstack_alloc (&collate->mempool,
1242                                          sizeof (struct element_t *));
1243                         elem->weights[cnt].w[0] = elem;
1244                         elem->weights[cnt].cnt = 1;
1245                       }
1246                     else
1247                       {
1248                         /* Simply use the weight from `ellipsis_weight'.  */
1249                         elem->weights[cnt].w =
1250                           collate->ellipsis_weight.weights[cnt].w;
1251                         elem->weights[cnt].cnt =
1252                           collate->ellipsis_weight.weights[cnt].cnt;
1253                       }
1254                 }
1255
1256               /* Increment for the next round.  */
1257             increment:
1258               for (cnt = len - 1; cnt >= 0; --cnt)
1259                 if (++mbcnt[cnt] != '\0')
1260                   break;
1261
1262               /* Find out whether this was all.  */
1263               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1264                 /* Yep, that's all.  */
1265                 break;
1266             }
1267         }
1268     }
1269   else
1270     {
1271       /* For symbolic range we naturally must have a beginning and an
1272          end specified by the user.  */
1273       if (startp == NULL)
1274         lr_error (ldfile, _("\
1275 %s: symbolic range ellipsis must not directly follow `order_start'"),
1276                   "LC_COLLATE");
1277       else if (endp == NULL)
1278         lr_error (ldfile, _("\
1279 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1280                   "LC_COLLATE");
1281       else
1282         {
1283           /* Determine the range.  To do so we have to determine the
1284              common prefix of the both names and then the numeric
1285              values of both ends.  */
1286           size_t lenfrom = strlen (startp->name);
1287           size_t lento = strlen (endp->name);
1288           char buf[lento + 1];
1289           int preflen = 0;
1290           long int from;
1291           long int to;
1292           char *cp;
1293           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1294
1295           if (lenfrom != lento)
1296             {
1297             invalid_range:
1298               lr_error (ldfile, _("\
1299 `%s' and `%.*s' are not valid names for symbolic range"),
1300                         startp->name, (int) lento, endp->name);
1301               return;
1302             }
1303
1304           while (startp->name[preflen] == endp->name[preflen])
1305             if (startp->name[preflen] == '\0')
1306               /* Nothing to be done.  The start and end point are identical
1307                  and while inserting the end point we have already given
1308                  the user an error message.  */
1309               return;
1310             else
1311               ++preflen;
1312
1313           errno = 0;
1314           from = strtol (startp->name + preflen, &cp, base);
1315           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1316             goto invalid_range;
1317
1318           errno = 0;
1319           to = strtol (endp->name + preflen, &cp, base);
1320           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1321             goto invalid_range;
1322
1323           /* Copy the prefix.  */
1324           memcpy (buf, startp->name, preflen);
1325
1326           /* Loop over all values.  */
1327           for (++from; from < to; ++from)
1328             {
1329               struct element_t *elem = NULL;
1330               struct charseq *seq;
1331               uint32_t wc;
1332               int cnt;
1333
1334               /* Generate the name.  */
1335               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1336                        (int) (lenfrom - preflen), from);
1337
1338               /* Look whether this name is already defined.  */
1339               void *ptr;
1340               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1341                 {
1342                   /* Copy back the result.  */
1343                   elem = ptr;
1344
1345                   if (elem->next != NULL || (collate->cursor != NULL
1346                                              && elem->next == collate->cursor))
1347                     {
1348                       lr_error (ldfile, _("\
1349 %s: order for `%.*s' already defined at %s:%Zu"),
1350                                 "LC_COLLATE", (int) lenfrom, buf,
1351                                 elem->file, elem->line);
1352                       continue;
1353                     }
1354
1355                   if (elem->name == NULL)
1356                     {
1357                       lr_error (ldfile, _("%s: `%s' must be a character"),
1358                                 "LC_COLLATE", buf);
1359                       continue;
1360                     }
1361                 }
1362
1363               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1364                 {
1365                   /* Search for a character of this name.  */
1366                   seq = charmap_find_value (charmap, buf, lenfrom);
1367                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1368                     {
1369                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1370
1371                       if (seq != NULL)
1372                         seq->ucs4 = wc;
1373                     }
1374                   else
1375                     wc = seq->ucs4;
1376
1377                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1378                     /* We don't know anything about a character with this
1379                        name.  XXX Should we warn?  */
1380                     continue;
1381
1382                   if (elem == NULL)
1383                     {
1384                       uint32_t wcs[2] = { wc, 0 };
1385
1386                       /* We have to allocate an entry.  */
1387                       elem = new_element (collate,
1388                                           seq != NULL ? seq->bytes : NULL,
1389                                           seq != NULL ? seq->nbytes : 0,
1390                                           wc == ILLEGAL_CHAR_VALUE
1391                                           ? NULL : wcs, buf, lenfrom, 1);
1392                     }
1393                   else
1394                     {
1395                       /* Update the element.  */
1396                       if (seq != NULL)
1397                         {
1398                           elem->mbs = obstack_copy0 (&collate->mempool,
1399                                                      seq->bytes, seq->nbytes);
1400                           elem->nmbs = seq->nbytes;
1401                         }
1402
1403                       if (wc != ILLEGAL_CHAR_VALUE)
1404                         {
1405                           uint32_t zero = 0;
1406
1407                           obstack_grow (&collate->mempool,
1408                                         &wc, sizeof (uint32_t));
1409                           obstack_grow (&collate->mempool,
1410                                         &zero, sizeof (uint32_t));
1411                           elem->wcs = obstack_finish (&collate->mempool);
1412                           elem->nwcs = 1;
1413                         }
1414                     }
1415
1416                   elem->file = ldfile->fname;
1417                   elem->line = ldfile->lineno;
1418                   elem->section = collate->current_section;
1419                 }
1420
1421               /* Enqueue the new element.  */
1422               elem->last = collate->cursor;
1423               elem->next = collate->cursor->next;
1424               elem->last->next = elem;
1425               if (elem->next != NULL)
1426                 elem->next->last = elem;
1427               collate->cursor = elem;
1428
1429               /* Now add the weights.  They come from the `ellipsis_weights'
1430                  member of `collate'.  */
1431               elem->weights = (struct element_list_t *)
1432                 obstack_alloc (&collate->mempool,
1433                                nrules * sizeof (struct element_list_t));
1434               for (cnt = 0; cnt < nrules; ++cnt)
1435                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1436                     && (collate->ellipsis_weight.weights[cnt].w[0]
1437                         == ELEMENT_ELLIPSIS2))
1438                   {
1439                     elem->weights[cnt].w = (struct element_t **)
1440                       obstack_alloc (&collate->mempool,
1441                                      sizeof (struct element_t *));
1442                     elem->weights[cnt].w[0] = elem;
1443                     elem->weights[cnt].cnt = 1;
1444                   }
1445                 else
1446                   {
1447                     /* Simly use the weight from `ellipsis_weight'.  */
1448                     elem->weights[cnt].w =
1449                       collate->ellipsis_weight.weights[cnt].w;
1450                     elem->weights[cnt].cnt =
1451                       collate->ellipsis_weight.weights[cnt].cnt;
1452                   }
1453             }
1454         }
1455     }
1456 }
1457
1458
1459 static void
1460 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1461                  struct localedef_t *copy_locale, int ignore_content)
1462 {
1463   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1464     {
1465       struct locale_collate_t *collate;
1466
1467       if (copy_locale == NULL)
1468         {
1469           collate = locale->categories[LC_COLLATE].collate =
1470             (struct locale_collate_t *)
1471             xcalloc (1, sizeof (struct locale_collate_t));
1472
1473           /* Init the various data structures.  */
1474           init_hash (&collate->elem_table, 100);
1475           init_hash (&collate->sym_table, 100);
1476           init_hash (&collate->seq_table, 500);
1477           obstack_init (&collate->mempool);
1478
1479           collate->col_weight_max = -1;
1480         }
1481       else
1482         /* Reuse the copy_locale's data structures.  */
1483         collate = locale->categories[LC_COLLATE].collate =
1484           copy_locale->categories[LC_COLLATE].collate;
1485     }
1486
1487   ldfile->translate_strings = 0;
1488   ldfile->return_widestr = 0;
1489 }
1490
1491
1492 void
1493 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1494 {
1495   /* Now is the time when we can assign the individual collation
1496      values for all the symbols.  We have possibly different values
1497      for the wide- and the multibyte-character symbols.  This is done
1498      since it might make a difference in the encoding if there is in
1499      some cases no multibyte-character but there are wide-characters.
1500      (The other way around it is not important since theencoded
1501      collation value in the wide-character case is 32 bits wide and
1502      therefore requires no encoding).
1503
1504      The lowest collation value assigned is 2.  Zero is reserved for
1505      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1506      functions and 1 is used to separate the individual passes for the
1507      different rules.
1508
1509      We also have to construct is list with all the bytes/words which
1510      can come first in a sequence, followed by all the elements which
1511      also start with this byte/word.  The order is reverse which has
1512      among others the important effect that longer strings are located
1513      first in the list.  This is required for the output data since
1514      the algorithm used in `strcoll' etc depends on this.
1515
1516      The multibyte case is easy.  We simply sort into an array with
1517      256 elements.  */
1518   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1519   int mbact[nrules];
1520   int wcact;
1521   int mbseqact;
1522   int wcseqact;
1523   struct element_t *runp;
1524   int i;
1525   int need_undefined = 0;
1526   struct section_list *sect;
1527   int ruleidx;
1528   int nr_wide_elems = 0;
1529
1530   if (collate == NULL)
1531     {
1532       /* No data, no check.  */
1533       if (! be_quiet)
1534         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1535                                 "LC_COLLATE"));
1536       return;
1537     }
1538
1539   /* If this assertion is hit change the type in `element_t'.  */
1540   assert (nrules <= sizeof (runp->used_in_level) * 8);
1541
1542   /* Make sure that the `position' rule is used either in all sections
1543      or in none.  */
1544   for (i = 0; i < nrules; ++i)
1545     for (sect = collate->sections; sect != NULL; sect = sect->next)
1546       if (sect->rules != NULL
1547           && ((sect->rules[i] & sort_position)
1548               != (collate->sections->rules[i] & sort_position)))
1549         {
1550           WITH_CUR_LOCALE (error (0, 0, _("\
1551 %s: `position' must be used for a specific level in all sections or none"),
1552                                   "LC_COLLATE"));
1553           break;
1554         }
1555
1556   /* Find out which elements are used at which level.  At the same
1557      time we find out whether we have any undefined symbols.  */
1558   runp = collate->start;
1559   while (runp != NULL)
1560     {
1561       if (runp->mbs != NULL)
1562         {
1563           for (i = 0; i < nrules; ++i)
1564             {
1565               int j;
1566
1567               for (j = 0; j < runp->weights[i].cnt; ++j)
1568                 /* A NULL pointer as the weight means IGNORE.  */
1569                 if (runp->weights[i].w[j] != NULL)
1570                   {
1571                     if (runp->weights[i].w[j]->weights == NULL)
1572                       {
1573                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1574                                                         runp->line,
1575                                                         _("symbol `%s' not defined"),
1576                                                         runp->weights[i].w[j]->name));
1577
1578                         need_undefined = 1;
1579                         runp->weights[i].w[j] = &collate->undefined;
1580                       }
1581                     else
1582                       /* Set the bit for the level.  */
1583                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1584                   }
1585             }
1586         }
1587
1588       /* Up to the next entry.  */
1589       runp = runp->next;
1590     }
1591
1592   /* Walk through the list of defined sequences and assign weights.  Also
1593      create the data structure which will allow generating the single byte
1594      character based tables.
1595
1596      Since at each time only the weights for each of the rules are
1597      only compared to other weights for this rule it is possible to
1598      assign more compact weight values than simply counting all
1599      weights in sequence.  We can assign weights from 3, one for each
1600      rule individually and only for those elements, which are actually
1601      used for this rule.
1602
1603      Why is this important?  It is not for the wide char table.  But
1604      it is for the singlebyte output since here larger numbers have to
1605      be encoded to make it possible to emit the value as a byte
1606      string.  */
1607   for (i = 0; i < nrules; ++i)
1608     mbact[i] = 2;
1609   wcact = 2;
1610   mbseqact = 0;
1611   wcseqact = 0;
1612   runp = collate->start;
1613   while (runp != NULL)
1614     {
1615       /* Determine the order.  */
1616       if (runp->used_in_level != 0)
1617         {
1618           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1619                                                  nrules * sizeof (int));
1620
1621           for (i = 0; i < nrules; ++i)
1622             if ((runp->used_in_level & (1 << i)) != 0)
1623               runp->mborder[i] = mbact[i]++;
1624             else
1625               runp->mborder[i] = 0;
1626         }
1627
1628       if (runp->mbs != NULL)
1629         {
1630           struct element_t **eptr;
1631           struct element_t *lastp = NULL;
1632
1633           /* Find the point where to insert in the list.  */
1634           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1635           while (*eptr != NULL)
1636             {
1637               if ((*eptr)->nmbs < runp->nmbs)
1638                 break;
1639
1640               if ((*eptr)->nmbs == runp->nmbs)
1641                 {
1642                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1643
1644                   if (c == 0)
1645                     {
1646                       /* This should not happen.  It means that we have
1647                          to symbols with the same byte sequence.  It is
1648                          of course an error.  */
1649                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1650                                                       (*eptr)->line,
1651                                                       _("\
1652 symbol `%s' has the same encoding as"), (*eptr)->name);
1653                                        error_at_line (0, 0, runp->file,
1654                                                       runp->line,
1655                                                       _("symbol `%s'"),
1656                                                       runp->name));
1657                       goto dont_insert;
1658                     }
1659                   else if (c < 0)
1660                     /* Insert it here.  */
1661                     break;
1662                 }
1663
1664               /* To the next entry.  */
1665               lastp = *eptr;
1666               eptr = &(*eptr)->mbnext;
1667             }
1668
1669           /* Set the pointers.  */
1670           runp->mbnext = *eptr;
1671           runp->mblast = lastp;
1672           if (*eptr != NULL)
1673             (*eptr)->mblast = runp;
1674           *eptr = runp;
1675         dont_insert:
1676           ;
1677         }
1678
1679       if (runp->used_in_level)
1680         {
1681           runp->wcorder = wcact++;
1682
1683           /* We take the opportunity to count the elements which have
1684              wide characters.  */
1685           ++nr_wide_elems;
1686         }
1687
1688       if (runp->is_character)
1689         {
1690           if (runp->nmbs == 1)
1691             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1692
1693           runp->wcseqorder = wcseqact++;
1694         }
1695       else if (runp->mbs != NULL && runp->weights != NULL)
1696         /* This is for collation elements.  */
1697         runp->wcseqorder = wcseqact++;
1698
1699       /* Up to the next entry.  */
1700       runp = runp->next;
1701     }
1702
1703   /* Find out whether any of the `mbheads' entries is unset.  In this
1704      case we use the UNDEFINED entry.  */
1705   for (i = 1; i < 256; ++i)
1706     if (collate->mbheads[i] == NULL)
1707       {
1708         need_undefined = 1;
1709         collate->mbheads[i] = &collate->undefined;
1710       }
1711
1712   /* Now to the wide character case.  */
1713   collate->wcheads.p = 6;
1714   collate->wcheads.q = 10;
1715   wchead_table_init (&collate->wcheads);
1716
1717   collate->wcseqorder.p = 6;
1718   collate->wcseqorder.q = 10;
1719   collseq_table_init (&collate->wcseqorder);
1720
1721   /* Start adding.  */
1722   runp = collate->start;
1723   while (runp != NULL)
1724     {
1725       if (runp->wcs != NULL)
1726         {
1727           struct element_t *e;
1728           struct element_t **eptr;
1729           struct element_t *lastp;
1730
1731           /* Insert the collation sequence value.  */
1732           if (runp->is_character)
1733             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1734                                runp->wcseqorder);
1735
1736           /* Find the point where to insert in the list.  */
1737           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1738           eptr = &e;
1739           lastp = NULL;
1740           while (*eptr != NULL)
1741             {
1742               if ((*eptr)->nwcs < runp->nwcs)
1743                 break;
1744
1745               if ((*eptr)->nwcs == runp->nwcs)
1746                 {
1747                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1748                                    (wchar_t *) runp->wcs, runp->nwcs);
1749
1750                   if (c == 0)
1751                     {
1752                       /* This should not happen.  It means that we have
1753                          two symbols with the same byte sequence.  It is
1754                          of course an error.  */
1755                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1756                                                       (*eptr)->line,
1757                                                       _("\
1758 symbol `%s' has the same encoding as"), (*eptr)->name);
1759                                        error_at_line (0, 0, runp->file,
1760                                                       runp->line,
1761                                                       _("symbol `%s'"),
1762                                                       runp->name));
1763                       goto dont_insertwc;
1764                     }
1765                   else if (c < 0)
1766                     /* Insert it here.  */
1767                     break;
1768                 }
1769
1770               /* To the next entry.  */
1771               lastp = *eptr;
1772               eptr = &(*eptr)->wcnext;
1773             }
1774
1775           /* Set the pointers.  */
1776           runp->wcnext = *eptr;
1777           runp->wclast = lastp;
1778           if (*eptr != NULL)
1779             (*eptr)->wclast = runp;
1780           *eptr = runp;
1781           if (eptr == &e)
1782             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1783         dont_insertwc:
1784           ;
1785         }
1786
1787       /* Up to the next entry.  */
1788       runp = runp->next;
1789     }
1790
1791   collseq_table_finalize (&collate->wcseqorder);
1792
1793   /* Now determine whether the UNDEFINED entry is needed and if yes,
1794      whether it was defined.  */
1795   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1796   if (collate->undefined.file == NULL)
1797     {
1798       if (need_undefined)
1799         {
1800           /* This seems not to be enforced by recent standards.  Don't
1801              emit an error, simply append UNDEFINED at the end.  */
1802           if (0)
1803             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1804
1805           /* Add UNDEFINED at the end.  */
1806           collate->undefined.mborder =
1807             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1808
1809           for (i = 0; i < nrules; ++i)
1810             collate->undefined.mborder[i] = mbact[i]++;
1811         }
1812
1813       /* In any case we will need the definition for the wide character
1814          case.  But we will not complain that it is missing since the
1815          specification strangely enough does not seem to account for
1816          this.  */
1817       collate->undefined.wcorder = wcact++;
1818     }
1819
1820   /* Finally, try to unify the rules for the sections.  Whenever the rules
1821      for a section are the same as those for another section give the
1822      ruleset the same index.  Since there are never many section we can
1823      use an O(n^2) algorithm here.  */
1824   sect = collate->sections;
1825   while (sect != NULL && sect->rules == NULL)
1826     sect = sect->next;
1827
1828   /* Bail out if we have no sections because of earlier errors.  */
1829   if (sect == NULL)
1830     {
1831       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1832                               _("too many errors; giving up")));
1833       return;
1834     }
1835
1836   ruleidx = 0;
1837   do
1838     {
1839       struct section_list *osect = collate->sections;
1840
1841       while (osect != sect)
1842         if (osect->rules != NULL
1843             && memcmp (osect->rules, sect->rules, nrules) == 0)
1844           break;
1845         else
1846           osect = osect->next;
1847
1848       if (osect == sect)
1849         sect->ruleidx = ruleidx++;
1850       else
1851         sect->ruleidx = osect->ruleidx;
1852
1853       /* Next section.  */
1854       do
1855         sect = sect->next;
1856       while (sect != NULL && sect->rules == NULL);
1857     }
1858   while (sect != NULL);
1859   /* We are currently not prepared for more than 128 rulesets.  But this
1860      should never really be a problem.  */
1861   assert (ruleidx <= 128);
1862 }
1863
1864
1865 static int32_t
1866 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1867                struct element_t *elem)
1868 {
1869   size_t cnt;
1870   int32_t retval;
1871
1872   /* Optimize the use of UNDEFINED.  */
1873   if (elem == &collate->undefined)
1874     /* The weights are already inserted.  */
1875     return 0;
1876
1877   /* This byte can start exactly one collation element and this is
1878      a single byte.  We can directly give the index to the weights.  */
1879   retval = obstack_object_size (pool);
1880
1881   /* Construct the weight.  */
1882   for (cnt = 0; cnt < nrules; ++cnt)
1883     {
1884       char buf[elem->weights[cnt].cnt * 7];
1885       int len = 0;
1886       int i;
1887
1888       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1889         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1890         if (elem->weights[cnt].w[i] != NULL)
1891           len += utf8_encode (&buf[len],
1892                               elem->weights[cnt].w[i]->mborder[cnt]);
1893
1894       /* And add the buffer content.  */
1895       obstack_1grow (pool, len);
1896       obstack_grow (pool, buf, len);
1897     }
1898
1899   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1900 }
1901
1902
1903 static int32_t
1904 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1905                  struct element_t *elem)
1906 {
1907   size_t cnt;
1908   int32_t retval;
1909
1910   /* Optimize the use of UNDEFINED.  */
1911   if (elem == &collate->undefined)
1912     /* The weights are already inserted.  */
1913     return 0;
1914
1915   /* This byte can start exactly one collation element and this is
1916      a single byte.  We can directly give the index to the weights.  */
1917   retval = obstack_object_size (pool) / sizeof (int32_t);
1918
1919   /* Construct the weight.  */
1920   for (cnt = 0; cnt < nrules; ++cnt)
1921     {
1922       int32_t buf[elem->weights[cnt].cnt];
1923       int i;
1924       int32_t j;
1925
1926       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1927         if (elem->weights[cnt].w[i] != NULL)
1928           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1929
1930       /* And add the buffer content.  */
1931       obstack_int32_grow (pool, j);
1932
1933       obstack_grow (pool, buf, j * sizeof (int32_t));
1934     }
1935
1936   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1937 }
1938
1939 /* If localedef is every threaded, this would need to be __thread var.  */
1940 static struct
1941 {
1942   struct obstack *weightpool;
1943   struct obstack *extrapool;
1944   struct obstack *indpool;
1945   struct locale_collate_t *collate;
1946   struct collidx_table *tablewc;
1947 } atwc;
1948
1949 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1950
1951 static void
1952 add_to_tablewc (uint32_t ch, struct element_t *runp)
1953 {
1954   if (runp->wcnext == NULL && runp->nwcs == 1)
1955     {
1956       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1957                                            runp);
1958       collidx_table_add (atwc.tablewc, ch, weigthidx);
1959     }
1960   else
1961     {
1962       /* As for the singlebyte table, we recognize sequences and
1963          compress them.  */
1964       struct element_t *lastp;
1965
1966       collidx_table_add (atwc.tablewc, ch,
1967                          -(obstack_object_size (atwc.extrapool)
1968                          / sizeof (uint32_t)));
1969
1970       do
1971         {
1972           /* Store the current index in the weight table.  We know that
1973              the current position in the `extrapool' is aligned on a
1974              32-bit address.  */
1975           int32_t weightidx;
1976           int added;
1977
1978           /* Find out wether this is a single entry or we have more than
1979              one consecutive entry.  */
1980           if (runp->wcnext != NULL
1981               && runp->nwcs == runp->wcnext->nwcs
1982               && wmemcmp ((wchar_t *) runp->wcs,
1983                           (wchar_t *)runp->wcnext->wcs,
1984                           runp->nwcs - 1) == 0
1985               && (runp->wcs[runp->nwcs - 1]
1986                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
1987             {
1988               int i;
1989               struct element_t *series_startp = runp;
1990               struct element_t *curp;
1991
1992               /* Now add first the initial byte sequence.  */
1993               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
1994               if (sizeof (int32_t) == sizeof (int))
1995                 obstack_make_room (atwc.extrapool, added);
1996
1997               /* More than one consecutive entry.  We mark this by having
1998                  a negative index into the indirect table.  */
1999               obstack_int32_grow_fast (atwc.extrapool,
2000                                        -(obstack_object_size (atwc.indpool)
2001                                          / sizeof (int32_t)));
2002               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2003
2004               do
2005                 runp = runp->wcnext;
2006               while (runp->wcnext != NULL
2007                      && runp->nwcs == runp->wcnext->nwcs
2008                      && wmemcmp ((wchar_t *) runp->wcs,
2009                                  (wchar_t *)runp->wcnext->wcs,
2010                                  runp->nwcs - 1) == 0
2011                      && (runp->wcs[runp->nwcs - 1]
2012                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2013
2014               /* Now walk backward from here to the beginning.  */
2015               curp = runp;
2016
2017               for (i = 1; i < runp->nwcs; ++i)
2018                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2019
2020               /* Now find the end of the consecutive sequence and
2021                  add all the indeces in the indirect pool.  */
2022               do
2023                 {
2024                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2025                                                curp);
2026                   obstack_int32_grow (atwc.indpool, weightidx);
2027
2028                   curp = curp->wclast;
2029                 }
2030               while (curp != series_startp);
2031
2032               /* Add the final weight.  */
2033               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2034                                            curp);
2035               obstack_int32_grow (atwc.indpool, weightidx);
2036
2037               /* And add the end byte sequence.  Without length this
2038                  time.  */
2039               for (i = 1; i < curp->nwcs; ++i)
2040                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2041             }
2042           else
2043             {
2044               /* A single entry.  Simply add the index and the length and
2045                  string (except for the first character which is already
2046                  tested for).  */
2047               int i;
2048
2049               /* Output the weight info.  */
2050               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2051                                            runp);
2052
2053               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2054               if (sizeof (int) == sizeof (int32_t))
2055                 obstack_make_room (atwc.extrapool, added);
2056
2057               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2058               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2059               for (i = 1; i < runp->nwcs; ++i)
2060                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2061             }
2062
2063           /* Next entry.  */
2064           lastp = runp;
2065           runp = runp->wcnext;
2066         }
2067       while (runp != NULL);
2068     }
2069 }
2070
2071 void
2072 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2073                 const char *output_path)
2074 {
2075   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2076   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2077   struct iovec iov[2 + nelems];
2078   struct locale_file data;
2079   uint32_t idx[nelems];
2080   size_t cnt;
2081   size_t ch;
2082   int32_t tablemb[256];
2083   struct obstack weightpool;
2084   struct obstack extrapool;
2085   struct obstack indirectpool;
2086   struct section_list *sect;
2087   struct collidx_table tablewc;
2088   uint32_t elem_size;
2089   uint32_t *elem_table;
2090   int i;
2091   struct element_t *runp;
2092
2093   data.magic = LIMAGIC (LC_COLLATE);
2094   data.n = nelems;
2095   iov[0].iov_base = (void *) &data;
2096   iov[0].iov_len = sizeof (data);
2097
2098   iov[1].iov_base = (void *) idx;
2099   iov[1].iov_len = sizeof (idx);
2100
2101   idx[0] = iov[0].iov_len + iov[1].iov_len;
2102   cnt = 0;
2103
2104   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2105   iov[2 + cnt].iov_base = &nrules;
2106   iov[2 + cnt].iov_len = sizeof (uint32_t);
2107   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2108   ++cnt;
2109
2110   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2111   if (collate == NULL)
2112     {
2113       int32_t dummy = 0;
2114
2115       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2116         {
2117           /* The words have to be handled specially.  */
2118           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2119             {
2120               iov[2 + cnt].iov_base = &dummy;
2121               iov[2 + cnt].iov_len = sizeof (int32_t);
2122             }
2123           else
2124             {
2125               iov[2 + cnt].iov_base = NULL;
2126               iov[2 + cnt].iov_len = 0;
2127             }
2128
2129           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2130             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2131           ++cnt;
2132         }
2133
2134       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2135
2136       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2137
2138       return;
2139     }
2140
2141   obstack_init (&weightpool);
2142   obstack_init (&extrapool);
2143   obstack_init (&indirectpool);
2144
2145   /* Since we are using the sign of an integer to mark indirection the
2146      offsets in the arrays we are indirectly referring to must not be
2147      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2148   obstack_int32_grow (&extrapool, 0);
2149   obstack_int32_grow (&indirectpool, 0);
2150
2151   /* Prepare the ruleset table.  */
2152   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2153     if (sect->rules != NULL && sect->ruleidx == i)
2154       {
2155         int j;
2156
2157         obstack_make_room (&weightpool, nrules);
2158
2159         for (j = 0; j < nrules; ++j)
2160           obstack_1grow_fast (&weightpool, sect->rules[j]);
2161         ++i;
2162       }
2163   /* And align the output.  */
2164   i = (nrules * i) % __alignof__ (int32_t);
2165   if (i > 0)
2166     do
2167       obstack_1grow (&weightpool, '\0');
2168     while (++i < __alignof__ (int32_t));
2169
2170   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2171   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2172   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2173   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2174   ++cnt;
2175
2176   /* Generate the 8-bit table.  Walk through the lists of sequences
2177      starting with the same byte and add them one after the other to
2178      the table.  In case we have more than one sequence starting with
2179      the same byte we have to use extra indirection.
2180
2181      First add a record for the NUL byte.  This entry will never be used
2182      so it does not matter.  */
2183   tablemb[0] = 0;
2184
2185   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2186      will probably be used more than once it is good to store the
2187      weights only once.  */
2188   if (collate->undefined.used_in_level != 0)
2189     output_weight (&weightpool, collate, &collate->undefined);
2190
2191   for (ch = 1; ch < 256; ++ch)
2192     if (collate->mbheads[ch]->mbnext == NULL
2193         && collate->mbheads[ch]->nmbs <= 1)
2194       {
2195         tablemb[ch] = output_weight (&weightpool, collate,
2196                                      collate->mbheads[ch]);
2197       }
2198     else
2199       {
2200         /* The entries in the list are sorted by length and then
2201            alphabetically.  This is the order in which we will add the
2202            elements to the collation table.  This allows simply walking
2203            the table in sequence and stopping at the first matching
2204            entry.  Since the longer sequences are coming first in the
2205            list they have the possibility to match first, just as it
2206            has to be.  In the worst case we are walking to the end of
2207            the list where we put, if no singlebyte sequence is defined
2208            in the locale definition, the weights for UNDEFINED.
2209
2210            To reduce the length of the search list we compress them a bit.
2211            This happens by collecting sequences of consecutive byte
2212            sequences in one entry (having and begin and end byte sequence)
2213            and add only one index into the weight table.  We can find the
2214            consecutive entries since they are also consecutive in the list.  */
2215         struct element_t *runp = collate->mbheads[ch];
2216         struct element_t *lastp;
2217
2218         assert ((obstack_object_size (&extrapool)
2219                  & (__alignof__ (int32_t) - 1)) == 0);
2220
2221         tablemb[ch] = -obstack_object_size (&extrapool);
2222
2223         do
2224           {
2225             /* Store the current index in the weight table.  We know that
2226                the current position in the `extrapool' is aligned on a
2227                32-bit address.  */
2228             int32_t weightidx;
2229             int added;
2230
2231             /* Find out wether this is a single entry or we have more than
2232                one consecutive entry.  */
2233             if (runp->mbnext != NULL
2234                 && runp->nmbs == runp->mbnext->nmbs
2235                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2236                 && (runp->mbs[runp->nmbs - 1]
2237                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2238               {
2239                 int i;
2240                 struct element_t *series_startp = runp;
2241                 struct element_t *curp;
2242
2243                 /* Compute how much space we will need.  */
2244                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2245                           + __alignof__ (int32_t) - 1)
2246                          & ~(__alignof__ (int32_t) - 1));
2247                 assert ((obstack_object_size (&extrapool)
2248                          & (__alignof__ (int32_t) - 1)) == 0);
2249                 obstack_make_room (&extrapool, added);
2250
2251                 /* More than one consecutive entry.  We mark this by having
2252                    a negative index into the indirect table.  */
2253                 obstack_int32_grow_fast (&extrapool,
2254                                          -(obstack_object_size (&indirectpool)
2255                                            / sizeof (int32_t)));
2256
2257                 /* Now search first the end of the series.  */
2258                 do
2259                   runp = runp->mbnext;
2260                 while (runp->mbnext != NULL
2261                        && runp->nmbs == runp->mbnext->nmbs
2262                        && memcmp (runp->mbs, runp->mbnext->mbs,
2263                                   runp->nmbs - 1) == 0
2264                        && (runp->mbs[runp->nmbs - 1]
2265                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2266
2267                 /* Now walk backward from here to the beginning.  */
2268                 curp = runp;
2269
2270                 assert (runp->nmbs <= 256);
2271                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2272                 for (i = 1; i < curp->nmbs; ++i)
2273                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2274
2275                 /* Now find the end of the consecutive sequence and
2276                    add all the indeces in the indirect pool.  */
2277                 do
2278                   {
2279                     weightidx = output_weight (&weightpool, collate, curp);
2280                     obstack_int32_grow (&indirectpool, weightidx);
2281
2282                     curp = curp->mblast;
2283                   }
2284                 while (curp != series_startp);
2285
2286                 /* Add the final weight.  */
2287                 weightidx = output_weight (&weightpool, collate, curp);
2288                 obstack_int32_grow (&indirectpool, weightidx);
2289
2290                 /* And add the end byte sequence.  Without length this
2291                    time.  */
2292                 for (i = 1; i < curp->nmbs; ++i)
2293                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2294               }
2295             else
2296               {
2297                 /* A single entry.  Simply add the index and the length and
2298                    string (except for the first character which is already
2299                    tested for).  */
2300                 int i;
2301
2302                 /* Output the weight info.  */
2303                 weightidx = output_weight (&weightpool, collate, runp);
2304
2305                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2306                           + __alignof__ (int32_t) - 1)
2307                          & ~(__alignof__ (int32_t) - 1));
2308                 assert ((obstack_object_size (&extrapool)
2309                          & (__alignof__ (int32_t) - 1)) == 0);
2310                 obstack_make_room (&extrapool, added);
2311
2312                 obstack_int32_grow_fast (&extrapool, weightidx);
2313                 assert (runp->nmbs <= 256);
2314                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2315
2316                 for (i = 1; i < runp->nmbs; ++i)
2317                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2318               }
2319
2320             /* Add alignment bytes if necessary.  */
2321             while ((obstack_object_size (&extrapool)
2322                     & (__alignof__ (int32_t) - 1)) != 0)
2323               obstack_1grow_fast (&extrapool, '\0');
2324
2325             /* Next entry.  */
2326             lastp = runp;
2327             runp = runp->mbnext;
2328           }
2329         while (runp != NULL);
2330
2331         assert ((obstack_object_size (&extrapool)
2332                  & (__alignof__ (int32_t) - 1)) == 0);
2333
2334         /* If the final entry in the list is not a single character we
2335            add an UNDEFINED entry here.  */
2336         if (lastp->nmbs != 1)
2337           {
2338             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2339                          & ~(__alignof__ (int32_t) - 1));
2340             obstack_make_room (&extrapool, added);
2341
2342             obstack_int32_grow_fast (&extrapool, 0);
2343             /* XXX What rule? We just pick the first.  */
2344             obstack_1grow_fast (&extrapool, 0);
2345             /* Length is zero.  */
2346             obstack_1grow_fast (&extrapool, 0);
2347
2348             /* Add alignment bytes if necessary.  */
2349             while ((obstack_object_size (&extrapool)
2350                     & (__alignof__ (int32_t) - 1)) != 0)
2351               obstack_1grow_fast (&extrapool, '\0');
2352           }
2353       }
2354
2355   /* Add padding to the tables if necessary.  */
2356   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2357          != 0)
2358     obstack_1grow (&weightpool, 0);
2359
2360   /* Now add the four tables.  */
2361   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2362   iov[2 + cnt].iov_base = tablemb;
2363   iov[2 + cnt].iov_len = sizeof (tablemb);
2364   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2365   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2366   ++cnt;
2367
2368   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2369   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2370   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2371   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2372   ++cnt;
2373
2374   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2375   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2376   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2377   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2378   ++cnt;
2379
2380   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2381   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2382   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2383   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2384   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2385   ++cnt;
2386
2387
2388   /* Now the same for the wide character table.  We need to store some
2389      more information here.  */
2390   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2391   iov[2 + cnt].iov_base = NULL;
2392   iov[2 + cnt].iov_len = 0;
2393   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2394   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2395   ++cnt;
2396
2397   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2398   iov[2 + cnt].iov_base = NULL;
2399   iov[2 + cnt].iov_len = 0;
2400   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2401   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2402   ++cnt;
2403
2404   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2405   iov[2 + cnt].iov_base = NULL;
2406   iov[2 + cnt].iov_len = 0;
2407   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2408   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2409   ++cnt;
2410
2411   /* Since we are using the sign of an integer to mark indirection the
2412      offsets in the arrays we are indirectly referring to must not be
2413      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2414   obstack_int32_grow (&extrapool, 0);
2415   obstack_int32_grow (&indirectpool, 0);
2416
2417   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2418      will probably be used more than once it is good to store the
2419      weights only once.  */
2420   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2421     abort ();
2422
2423   /* Generate the table.  Walk through the lists of sequences starting
2424      with the same wide character and add them one after the other to
2425      the table.  In case we have more than one sequence starting with
2426      the same byte we have to use extra indirection.  */
2427   tablewc.p = 6;
2428   tablewc.q = 10;
2429   collidx_table_init (&tablewc);
2430
2431   atwc.weightpool = &weightpool;
2432   atwc.extrapool = &extrapool;
2433   atwc.indpool = &indirectpool;
2434   atwc.collate = collate;
2435   atwc.tablewc = &tablewc;
2436
2437   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2438
2439   memset (&atwc, 0, sizeof (atwc));
2440
2441   collidx_table_finalize (&tablewc);
2442
2443   /* Now add the four tables.  */
2444   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2445   iov[2 + cnt].iov_base = tablewc.result;
2446   iov[2 + cnt].iov_len = tablewc.result_size;
2447   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2448   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2449   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2450   ++cnt;
2451
2452   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2453   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2454   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2455   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2456   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2457   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2458   ++cnt;
2459
2460   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2461   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2462   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2463   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2464   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2465   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2466   ++cnt;
2467
2468   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2469   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2470   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2471   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2472   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2473   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2474   ++cnt;
2475
2476
2477   /* Finally write the table with collation element names out.  It is
2478      a hash table with a simple function which gets the name of the
2479      character as the input.  One character might have many names.  The
2480      value associated with the name is an index into the weight table
2481      where we are then interested in the first-level weight value.
2482
2483      To determine how large the table should be we are counting the
2484      elements have to put in.  Since we are using internal chaining
2485      using a secondary hash function we have to make the table a bit
2486      larger to avoid extremely long search times.  We can achieve
2487      good results with a 40% larger table than there are entries.  */
2488   elem_size = 0;
2489   runp = collate->start;
2490   while (runp != NULL)
2491     {
2492       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2493         /* Yep, the element really counts.  */
2494         ++elem_size;
2495
2496       runp = runp->next;
2497     }
2498   /* Add 40% and find the next prime number.  */
2499   elem_size = next_prime (elem_size * 1.4);
2500
2501   /* Allocate the table.  Each entry consists of two words: the hash
2502      value and an index in a secondary table which provides the index
2503      into the weight table and the string itself (so that a match can
2504      be determined).  */
2505   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2506                                            elem_size * 2 * sizeof (uint32_t));
2507   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2508
2509   /* Now add the elements.  */
2510   runp = collate->start;
2511   while (runp != NULL)
2512     {
2513       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2514         {
2515           /* Compute the hash value of the name.  */
2516           uint32_t namelen = strlen (runp->name);
2517           uint32_t hash = elem_hash (runp->name, namelen);
2518           size_t idx = hash % elem_size;
2519           size_t start_idx = idx;
2520
2521           if (elem_table[idx * 2] != 0)
2522             {
2523               /* The spot is already taken.  Try iterating using the value
2524                  from the secondary hashing function.  */
2525               size_t iter = hash % (elem_size - 2) + 1;
2526
2527               do
2528                 {
2529                   idx += iter;
2530                   if (idx >= elem_size)
2531                     idx -= elem_size;
2532                   assert (idx != start_idx);
2533                 }
2534               while (elem_table[idx * 2] != 0);
2535             }
2536           /* This is the spot where we will insert the value.  */
2537           elem_table[idx * 2] = hash;
2538           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2539
2540           /* The the string itself including length.  */
2541           obstack_1grow (&extrapool, namelen);
2542           obstack_grow (&extrapool, runp->name, namelen);
2543
2544           /* And the multibyte representation.  */
2545           obstack_1grow (&extrapool, runp->nmbs);
2546           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2547
2548           /* And align again to 32 bits.  */
2549           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2550             obstack_grow (&extrapool, "\0\0",
2551                           (sizeof (int32_t)
2552                            - ((1 + namelen + 1 + runp->nmbs)
2553                               % sizeof (int32_t))));
2554
2555           /* Now some 32-bit values: multibyte collation sequence,
2556              wide char string (including length), and wide char
2557              collation sequence.  */
2558           obstack_int32_grow (&extrapool, runp->mbseqorder);
2559
2560           obstack_int32_grow (&extrapool, runp->nwcs);
2561           obstack_grow (&extrapool, runp->wcs,
2562                         runp->nwcs * sizeof (uint32_t));
2563
2564           obstack_int32_grow (&extrapool, runp->wcseqorder);
2565         }
2566
2567       runp = runp->next;
2568     }
2569
2570   /* Prepare to write out this data.  */
2571   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2572   iov[2 + cnt].iov_base = &elem_size;
2573   iov[2 + cnt].iov_len = sizeof (int32_t);
2574   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2575   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2576   ++cnt;
2577
2578   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2579   iov[2 + cnt].iov_base = elem_table;
2580   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2581   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2582   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2583   ++cnt;
2584
2585   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2586   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2587   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2588   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2589   ++cnt;
2590
2591   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2592   iov[2 + cnt].iov_base = collate->mbseqorder;
2593   iov[2 + cnt].iov_len = 256;
2594   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2595   ++cnt;
2596
2597   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2598   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2599   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2600   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2601   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2602   ++cnt;
2603
2604   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2605   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2606   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2607   ++cnt;
2608
2609   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2610
2611   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2612
2613   obstack_free (&weightpool, NULL);
2614   obstack_free (&extrapool, NULL);
2615   obstack_free (&indirectpool, NULL);
2616 }
2617
2618
2619 void
2620 collate_read (struct linereader *ldfile, struct localedef_t *result,
2621               const struct charmap_t *charmap, const char *repertoire_name,
2622               int ignore_content)
2623 {
2624   struct repertoire_t *repertoire = NULL;
2625   struct locale_collate_t *collate;
2626   struct token *now;
2627   struct token *arg = NULL;
2628   enum token_t nowtok;
2629   enum token_t was_ellipsis = tok_none;
2630   struct localedef_t *copy_locale = NULL;
2631   /* Parsing state:
2632      0 - start
2633      1 - between `order-start' and `order-end'
2634      2 - after `order-end'
2635      3 - after `reorder-after', waiting for `reorder-end'
2636      4 - after `reorder-end'
2637      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2638      6 - after `reorder-sections-end'
2639   */
2640   int state = 0;
2641
2642   /* Get the repertoire we have to use.  */
2643   if (repertoire_name != NULL)
2644     repertoire = repertoire_read (repertoire_name);
2645
2646   /* The rest of the line containing `LC_COLLATE' must be free.  */
2647   lr_ignore_rest (ldfile, 1);
2648
2649   do
2650     {
2651       now = lr_token (ldfile, charmap, result, NULL, verbose);
2652       nowtok = now->tok;
2653     }
2654   while (nowtok == tok_eol);
2655
2656   if (nowtok == tok_copy)
2657     {
2658       state = 2;
2659       now = lr_token (ldfile, charmap, result, NULL, verbose);
2660       if (now->tok != tok_string)
2661         {
2662           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2663
2664         skip_category:
2665           do
2666             now = lr_token (ldfile, charmap, result, NULL, verbose);
2667           while (now->tok != tok_eof && now->tok != tok_end);
2668
2669           if (now->tok != tok_eof
2670               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2671                   now->tok == tok_eof))
2672             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2673           else if (now->tok != tok_lc_collate)
2674             {
2675               lr_error (ldfile, _("\
2676 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2677               lr_ignore_rest (ldfile, 0);
2678             }
2679           else
2680             lr_ignore_rest (ldfile, 1);
2681
2682           return;
2683         }
2684
2685       if (! ignore_content)
2686         {
2687           /* Get the locale definition.  */
2688           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2689                                      repertoire_name, charmap, NULL);
2690           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2691             {
2692               /* Not yet loaded.  So do it now.  */
2693               if (locfile_read (copy_locale, charmap) != 0)
2694                 goto skip_category;
2695             }
2696
2697           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2698             return;
2699         }
2700
2701       lr_ignore_rest (ldfile, 1);
2702
2703       now = lr_token (ldfile, charmap, result, NULL, verbose);
2704       nowtok = now->tok;
2705     }
2706
2707   /* Prepare the data structures.  */
2708   collate_startup (ldfile, result, copy_locale, ignore_content);
2709   collate = result->categories[LC_COLLATE].collate;
2710
2711   while (1)
2712     {
2713       char ucs4buf[10];
2714       char *symstr;
2715       size_t symlen;
2716
2717       /* Of course we don't proceed beyond the end of file.  */
2718       if (nowtok == tok_eof)
2719         break;
2720
2721       /* Ingore empty lines.  */
2722       if (nowtok == tok_eol)
2723         {
2724           now = lr_token (ldfile, charmap, result, NULL, verbose);
2725           nowtok = now->tok;
2726           continue;
2727         }
2728
2729       switch (nowtok)
2730         {
2731         case tok_copy:
2732           /* Allow copying other locales.  */
2733           now = lr_token (ldfile, charmap, result, NULL, verbose);
2734           if (now->tok != tok_string)
2735             goto err_label;
2736
2737           if (! ignore_content)
2738             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2739                          charmap, result);
2740
2741           lr_ignore_rest (ldfile, 1);
2742           break;
2743
2744         case tok_coll_weight_max:
2745           /* Ignore the rest of the line if we don't need the input of
2746              this line.  */
2747           if (ignore_content)
2748             {
2749               lr_ignore_rest (ldfile, 0);
2750               break;
2751             }
2752
2753           if (state != 0)
2754             goto err_label;
2755
2756           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2757           if (arg->tok != tok_number)
2758             goto err_label;
2759           if (collate->col_weight_max != -1)
2760             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2761                       "LC_COLLATE", "col_weight_max");
2762           else
2763             collate->col_weight_max = arg->val.num;
2764           lr_ignore_rest (ldfile, 1);
2765           break;
2766
2767         case tok_section_symbol:
2768           /* Ignore the rest of the line if we don't need the input of
2769              this line.  */
2770           if (ignore_content)
2771             {
2772               lr_ignore_rest (ldfile, 0);
2773               break;
2774             }
2775
2776           if (state != 0)
2777             goto err_label;
2778
2779           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2780           if (arg->tok != tok_bsymbol)
2781             goto err_label;
2782           else if (!ignore_content)
2783             {
2784               /* Check whether this section is already known.  */
2785               struct section_list *known = collate->sections;
2786               while (known != NULL)
2787                 {
2788                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2789                     break;
2790                   known = known->next;
2791                 }
2792
2793               if (known != NULL)
2794                 {
2795                   lr_error (ldfile,
2796                             _("%s: duplicate declaration of section `%s'"),
2797                             "LC_COLLATE", arg->val.str.startmb);
2798                   free (arg->val.str.startmb);
2799                 }
2800               else
2801                 collate->sections = make_seclist_elem (collate,
2802                                                        arg->val.str.startmb,
2803                                                        collate->sections);
2804
2805               lr_ignore_rest (ldfile, known == NULL);
2806             }
2807           else
2808             {
2809               free (arg->val.str.startmb);
2810               lr_ignore_rest (ldfile, 0);
2811             }
2812           break;
2813
2814         case tok_collating_element:
2815           /* Ignore the rest of the line if we don't need the input of
2816              this line.  */
2817           if (ignore_content)
2818             {
2819               lr_ignore_rest (ldfile, 0);
2820               break;
2821             }
2822
2823           if (state != 0 && state != 2)
2824             goto err_label;
2825
2826           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2827           if (arg->tok != tok_bsymbol)
2828             goto err_label;
2829           else
2830             {
2831               const char *symbol = arg->val.str.startmb;
2832               size_t symbol_len = arg->val.str.lenmb;
2833
2834               /* Next the `from' keyword.  */
2835               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2836               if (arg->tok != tok_from)
2837                 {
2838                   free ((char *) symbol);
2839                   goto err_label;
2840                 }
2841
2842               ldfile->return_widestr = 1;
2843               ldfile->translate_strings = 1;
2844
2845               /* Finally the string with the replacement.  */
2846               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2847
2848               ldfile->return_widestr = 0;
2849               ldfile->translate_strings = 0;
2850
2851               if (arg->tok != tok_string)
2852                 goto err_label;
2853
2854               if (!ignore_content && symbol != NULL)
2855                 {
2856                   /* The name is already defined.  */
2857                   if (check_duplicate (ldfile, collate, charmap,
2858                                        repertoire, symbol, symbol_len))
2859                     goto col_elem_free;
2860
2861                   if (arg->val.str.startmb != NULL)
2862                     insert_entry (&collate->elem_table, symbol, symbol_len,
2863                                   new_element (collate,
2864                                                arg->val.str.startmb,
2865                                                arg->val.str.lenmb - 1,
2866                                                arg->val.str.startwc,
2867                                                symbol, symbol_len, 0));
2868                 }
2869               else
2870                 {
2871                 col_elem_free:
2872                   if (symbol != NULL)
2873                     free ((char *) symbol);
2874                   if (arg->val.str.startmb != NULL)
2875                     free (arg->val.str.startmb);
2876                   if (arg->val.str.startwc != NULL)
2877                     free (arg->val.str.startwc);
2878                 }
2879               lr_ignore_rest (ldfile, 1);
2880             }
2881           break;
2882
2883         case tok_collating_symbol:
2884           /* Ignore the rest of the line if we don't need the input of
2885              this line.  */
2886           if (ignore_content)
2887             {
2888               lr_ignore_rest (ldfile, 0);
2889               break;
2890             }
2891
2892           if (state != 0 && state != 2)
2893             goto err_label;
2894
2895           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2896           if (arg->tok != tok_bsymbol)
2897             goto err_label;
2898           else
2899             {
2900               char *symbol = arg->val.str.startmb;
2901               size_t symbol_len = arg->val.str.lenmb;
2902               char *endsymbol = NULL;
2903               size_t endsymbol_len = 0;
2904               enum token_t ellipsis = tok_none;
2905
2906               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2907               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2908                 {
2909                   ellipsis = arg->tok;
2910
2911                   arg = lr_token (ldfile, charmap, result, repertoire,
2912                                   verbose);
2913                   if (arg->tok != tok_bsymbol)
2914                     {
2915                       free (symbol);
2916                       goto err_label;
2917                     }
2918
2919                   endsymbol = arg->val.str.startmb;
2920                   endsymbol_len = arg->val.str.lenmb;
2921
2922                   lr_ignore_rest (ldfile, 1);
2923                 }
2924               else if (arg->tok != tok_eol)
2925                 {
2926                   free (symbol);
2927                   goto err_label;
2928                 }
2929
2930               if (!ignore_content)
2931                 {
2932                   if (symbol == NULL
2933                       || (ellipsis != tok_none && endsymbol == NULL))
2934                     {
2935                       lr_error (ldfile, _("\
2936 %s: unknown character in collating symbol name"),
2937                                 "LC_COLLATE");
2938                       goto col_sym_free;
2939                     }
2940                   else if (ellipsis == tok_none)
2941                     {
2942                       /* A single symbol, no ellipsis.  */
2943                       if (check_duplicate (ldfile, collate, charmap,
2944                                            repertoire, symbol, symbol_len))
2945                         /* The name is already defined.  */
2946                         goto col_sym_free;
2947
2948                       insert_entry (&collate->sym_table, symbol, symbol_len,
2949                                     new_symbol (collate, symbol, symbol_len));
2950                     }
2951                   else if (symbol_len != endsymbol_len)
2952                     {
2953                     col_sym_inv_range:
2954                       lr_error (ldfile,
2955                                 _("invalid names for character range"));
2956                       goto col_sym_free;
2957                     }
2958                   else
2959                     {
2960                       /* Oh my, we have to handle an ellipsis.  First, as
2961                          usual, determine the common prefix and then
2962                          convert the rest into a range.  */
2963                       size_t prefixlen;
2964                       unsigned long int from;
2965                       unsigned long int to;
2966                       char *endp;
2967
2968                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2969                         if (symbol[prefixlen] != endsymbol[prefixlen])
2970                           break;
2971
2972                       /* Convert the rest into numbers.  */
2973                       symbol[symbol_len] = '\0';
2974                       from = strtoul (&symbol[prefixlen], &endp,
2975                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2976                       if (*endp != '\0')
2977                         goto col_sym_inv_range;
2978
2979                       endsymbol[symbol_len] = '\0';
2980                       to = strtoul (&endsymbol[prefixlen], &endp,
2981                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2982                       if (*endp != '\0')
2983                         goto col_sym_inv_range;
2984
2985                       if (from > to)
2986                         goto col_sym_inv_range;
2987
2988                       /* Now loop over all entries.  */
2989                       while (from <= to)
2990                         {
2991                           char *symbuf;
2992
2993                           symbuf = (char *) obstack_alloc (&collate->mempool,
2994                                                            symbol_len + 1);
2995
2996                           /* Create the name.  */
2997                           sprintf (symbuf,
2998                                    ellipsis == tok_ellipsis2
2999                                    ? "%.*s%.*lX" : "%.*s%.*lu",
3000                                    (int) prefixlen, symbol,
3001                                    (int) (symbol_len - prefixlen), from);
3002
3003                           if (check_duplicate (ldfile, collate, charmap,
3004                                                repertoire, symbuf, symbol_len))
3005                             /* The name is already defined.  */
3006                             goto col_sym_free;
3007
3008                           insert_entry (&collate->sym_table, symbuf,
3009                                         symbol_len,
3010                                         new_symbol (collate, symbuf,
3011                                                     symbol_len));
3012
3013                           /* Increment the counter.  */
3014                           ++from;
3015                         }
3016
3017                       goto col_sym_free;
3018                     }
3019                 }
3020               else
3021                 {
3022                 col_sym_free:
3023                   if (symbol != NULL)
3024                     free (symbol);
3025                   if (endsymbol != NULL)
3026                     free (endsymbol);
3027                 }
3028             }
3029           break;
3030
3031         case tok_symbol_equivalence:
3032           /* Ignore the rest of the line if we don't need the input of
3033              this line.  */
3034           if (ignore_content)
3035             {
3036               lr_ignore_rest (ldfile, 0);
3037               break;
3038             }
3039
3040           if (state != 0)
3041             goto err_label;
3042
3043           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3044           if (arg->tok != tok_bsymbol)
3045             goto err_label;
3046           else
3047             {
3048               const char *newname = arg->val.str.startmb;
3049               size_t newname_len = arg->val.str.lenmb;
3050               const char *symname;
3051               size_t symname_len;
3052               void *symval;     /* Actually struct symbol_t*  */
3053
3054               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3055               if (arg->tok != tok_bsymbol)
3056                 {
3057                   if (newname != NULL)
3058                     free ((char *) newname);
3059                   goto err_label;
3060                 }
3061
3062               symname = arg->val.str.startmb;
3063               symname_len = arg->val.str.lenmb;
3064
3065               if (newname == NULL)
3066                 {
3067                   lr_error (ldfile, _("\
3068 %s: unknown character in equivalent definition name"),
3069                             "LC_COLLATE");
3070
3071                 sym_equiv_free:
3072                   if (newname != NULL)
3073                     free ((char *) newname);
3074                   if (symname != NULL)
3075                     free ((char *) symname);
3076                   break;
3077                 }
3078               if (symname == NULL)
3079                 {
3080                   lr_error (ldfile, _("\
3081 %s: unknown character in equivalent definition value"),
3082                             "LC_COLLATE");
3083                   goto sym_equiv_free;
3084                 }
3085
3086               /* See whether the symbol name is already defined.  */
3087               if (find_entry (&collate->sym_table, symname, symname_len,
3088                               &symval) != 0)
3089                 {
3090                   lr_error (ldfile, _("\
3091 %s: unknown symbol `%s' in equivalent definition"),
3092                             "LC_COLLATE", symname);
3093                   goto sym_equiv_free;
3094                 }
3095
3096               if (insert_entry (&collate->sym_table,
3097                                 newname, newname_len, symval) < 0)
3098                 {
3099                   lr_error (ldfile, _("\
3100 error while adding equivalent collating symbol"));
3101                   goto sym_equiv_free;
3102                 }
3103
3104               free ((char *) symname);
3105             }
3106           lr_ignore_rest (ldfile, 1);
3107           break;
3108
3109         case tok_script:
3110           /* We get told about the scripts we know.  */
3111           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3112           if (arg->tok != tok_bsymbol)
3113             goto err_label;
3114           else
3115             {
3116               struct section_list *runp = collate->known_sections;
3117               char *name;
3118
3119               while (runp != NULL)
3120                 if (strncmp (runp->name, arg->val.str.startmb,
3121                              arg->val.str.lenmb) == 0
3122                     && runp->name[arg->val.str.lenmb] == '\0')
3123                   break;
3124                 else
3125                   runp = runp->def_next;
3126
3127               if (runp != NULL)
3128                 {
3129                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3130                             runp->name);
3131                   lr_ignore_rest (ldfile, 0);
3132                   break;
3133                 }
3134
3135               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3136               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3137               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3138               name[arg->val.str.lenmb] = '\0';
3139               runp->name = name;
3140
3141               runp->def_next = collate->known_sections;
3142               collate->known_sections = runp;
3143             }
3144           lr_ignore_rest (ldfile, 1);
3145           break;
3146
3147         case tok_order_start:
3148           /* Ignore the rest of the line if we don't need the input of
3149              this line.  */
3150           if (ignore_content)
3151             {
3152               lr_ignore_rest (ldfile, 0);
3153               break;
3154             }
3155
3156           if (state != 0 && state != 1 && state != 2)
3157             goto err_label;
3158           state = 1;
3159
3160           /* The 14652 draft does not specify whether all `order_start' lines
3161              must contain the same number of sort-rules, but 14651 does.  So
3162              we require this here as well.  */
3163           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3164           if (arg->tok == tok_bsymbol)
3165             {
3166               /* This better should be a section name.  */
3167               struct section_list *sp = collate->known_sections;
3168               while (sp != NULL
3169                      && (sp->name == NULL
3170                          || strncmp (sp->name, arg->val.str.startmb,
3171                                      arg->val.str.lenmb) != 0
3172                          || sp->name[arg->val.str.lenmb] != '\0'))
3173                 sp = sp->def_next;
3174
3175               if (sp == NULL)
3176                 {
3177                   lr_error (ldfile, _("\
3178 %s: unknown section name `%.*s'"),
3179                             "LC_COLLATE", (int) arg->val.str.lenmb,
3180                             arg->val.str.startmb);
3181                   /* We use the error section.  */
3182                   collate->current_section = &collate->error_section;
3183
3184                   if (collate->error_section.first == NULL)
3185                     {
3186                       /* Insert &collate->error_section at the end of
3187                          the collate->sections list.  */
3188                       if (collate->sections == NULL)
3189                         collate->sections = &collate->error_section;
3190                       else
3191                         {
3192                           sp = collate->sections;
3193                           while (sp->next != NULL)
3194                             sp = sp->next;
3195
3196                           sp->next = &collate->error_section;
3197                         }
3198                       collate->error_section.next = NULL;
3199                     }
3200                 }
3201               else
3202                 {
3203                   /* One should not be allowed to open the same
3204                      section twice.  */
3205                   if (sp->first != NULL)
3206                     lr_error (ldfile, _("\
3207 %s: multiple order definitions for section `%s'"),
3208                               "LC_COLLATE", sp->name);
3209                   else
3210                     {
3211                       /* Insert sp in the collate->sections list,
3212                          right after collate->current_section.  */
3213                       if (collate->current_section == NULL)
3214                         collate->current_section = sp;
3215                       else
3216                         {
3217                           sp->next = collate->current_section->next;
3218                           collate->current_section->next = sp;
3219                         }
3220                     }
3221
3222                   /* Next should come the end of the line or a semicolon.  */
3223                   arg = lr_token (ldfile, charmap, result, repertoire,
3224                                   verbose);
3225                   if (arg->tok == tok_eol)
3226                     {
3227                       uint32_t cnt;
3228
3229                       /* This means we have exactly one rule: `forward'.  */
3230                       if (nrules > 1)
3231                         lr_error (ldfile, _("\
3232 %s: invalid number of sorting rules"),
3233                                   "LC_COLLATE");
3234                       else
3235                         nrules = 1;
3236                       sp->rules = obstack_alloc (&collate->mempool,
3237                                                  (sizeof (enum coll_sort_rule)
3238                                                   * nrules));
3239                       for (cnt = 0; cnt < nrules; ++cnt)
3240                         sp->rules[cnt] = sort_forward;
3241
3242                       /* Next line.  */
3243                       break;
3244                     }
3245
3246                   /* Get the next token.  */
3247                   arg = lr_token (ldfile, charmap, result, repertoire,
3248                                   verbose);
3249                 }
3250             }
3251           else
3252             {
3253               /* There is no section symbol.  Therefore we use the unnamed
3254                  section.  */
3255               collate->current_section = &collate->unnamed_section;
3256
3257               if (collate->unnamed_section.first != NULL)
3258                 lr_error (ldfile, _("\
3259 %s: multiple order definitions for unnamed section"),
3260                           "LC_COLLATE");
3261               else
3262                 {
3263                   /* Insert &collate->unnamed_section at the beginning of
3264                      the collate->sections list.  */
3265                   collate->unnamed_section.next = collate->sections;
3266                   collate->sections = &collate->unnamed_section;
3267                 }
3268             }
3269
3270           /* Now read the direction names.  */
3271           read_directions (ldfile, arg, charmap, repertoire, result);
3272
3273           /* From now we need the strings untranslated.  */
3274           ldfile->translate_strings = 0;
3275           break;
3276
3277         case tok_order_end:
3278           /* Ignore the rest of the line if we don't need the input of
3279              this line.  */
3280           if (ignore_content)
3281             {
3282               lr_ignore_rest (ldfile, 0);
3283               break;
3284             }
3285
3286           if (state != 1)
3287             goto err_label;
3288
3289           /* Handle ellipsis at end of list.  */
3290           if (was_ellipsis != tok_none)
3291             {
3292               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3293                                repertoire, result);
3294               was_ellipsis = tok_none;
3295             }
3296
3297           state = 2;
3298           lr_ignore_rest (ldfile, 1);
3299           break;
3300
3301         case tok_reorder_after:
3302           /* Ignore the rest of the line if we don't need the input of
3303              this line.  */
3304           if (ignore_content)
3305             {
3306               lr_ignore_rest (ldfile, 0);
3307               break;
3308             }
3309
3310           if (state == 1)
3311             {
3312               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3313                         "LC_COLLATE");
3314               state = 2;
3315
3316               /* Handle ellipsis at end of list.  */
3317               if (was_ellipsis != tok_none)
3318                 {
3319                   handle_ellipsis (ldfile, arg->val.str.startmb,
3320                                    arg->val.str.lenmb, was_ellipsis, charmap,
3321                                    repertoire, result);
3322                   was_ellipsis = tok_none;
3323                 }
3324             }
3325           else if (state != 2 && state != 3)
3326             goto err_label;
3327           state = 3;
3328
3329           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3330           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3331             {
3332               /* Find this symbol in the sequence table.  */
3333               char ucsbuf[10];
3334               char *startmb;
3335               size_t lenmb;
3336               struct element_t *insp;
3337               int no_error = 1;
3338               void *ptr;
3339
3340               if (arg->tok == tok_bsymbol)
3341                 {
3342                   startmb = arg->val.str.startmb;
3343                   lenmb = arg->val.str.lenmb;
3344                 }
3345               else
3346                 {
3347                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3348                   startmb = ucsbuf;
3349                   lenmb = 9;
3350                 }
3351
3352               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3353                 /* Yes, the symbol exists.  Simply point the cursor
3354                    to it.  */
3355                 collate->cursor = (struct element_t *) ptr;
3356               else
3357                 {
3358                   struct symbol_t *symbp;
3359                   void *ptr;
3360
3361                   if (find_entry (&collate->sym_table, startmb, lenmb,
3362                                   &ptr) == 0)
3363                     {
3364                       symbp = ptr;
3365
3366                       if (symbp->order->last != NULL
3367                           || symbp->order->next != NULL)
3368                         collate->cursor = symbp->order;
3369                       else
3370                         {
3371                           /* This is a collating symbol but its position
3372                              is not yet defined.  */
3373                           lr_error (ldfile, _("\
3374 %s: order for collating symbol %.*s not yet defined"),
3375                                     "LC_COLLATE", (int) lenmb, startmb);
3376                           collate->cursor = NULL;
3377                           no_error = 0;
3378                         }
3379                     }
3380                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3381                                        &ptr) == 0)
3382                     {
3383                       insp = (struct element_t *) ptr;
3384
3385                       if (insp->last != NULL || insp->next != NULL)
3386                         collate->cursor = insp;
3387                       else
3388                         {
3389                           /* This is a collating element but its position
3390                              is not yet defined.  */
3391                           lr_error (ldfile, _("\
3392 %s: order for collating element %.*s not yet defined"),
3393                                     "LC_COLLATE", (int) lenmb, startmb);
3394                           collate->cursor = NULL;
3395                           no_error = 0;
3396                         }
3397                     }
3398                   else
3399                     {
3400                       /* This is bad.  The symbol after which we have to
3401                          insert does not exist.  */
3402                       lr_error (ldfile, _("\
3403 %s: cannot reorder after %.*s: symbol not known"),
3404                                 "LC_COLLATE", (int) lenmb, startmb);
3405                       collate->cursor = NULL;
3406                       no_error = 0;
3407                     }
3408                 }
3409
3410               lr_ignore_rest (ldfile, no_error);
3411             }
3412           else
3413             /* This must not happen.  */
3414             goto err_label;
3415           break;
3416
3417         case tok_reorder_end:
3418           /* Ignore the rest of the line if we don't need the input of
3419              this line.  */
3420           if (ignore_content)
3421             break;
3422
3423           if (state != 3)
3424             goto err_label;
3425           state = 4;
3426           lr_ignore_rest (ldfile, 1);
3427           break;
3428
3429         case tok_reorder_sections_after:
3430           /* Ignore the rest of the line if we don't need the input of
3431              this line.  */
3432           if (ignore_content)
3433             {
3434               lr_ignore_rest (ldfile, 0);
3435               break;
3436             }
3437
3438           if (state == 1)
3439             {
3440               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3441                         "LC_COLLATE");
3442               state = 2;
3443
3444               /* Handle ellipsis at end of list.  */
3445               if (was_ellipsis != tok_none)
3446                 {
3447                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3448                                    repertoire, result);
3449                   was_ellipsis = tok_none;
3450                 }
3451             }
3452           else if (state == 3)
3453             {
3454               WITH_CUR_LOCALE (error (0, 0, _("\
3455 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3456               state = 4;
3457             }
3458           else if (state != 2 && state != 4)
3459             goto err_label;
3460           state = 5;
3461
3462           /* Get the name of the sections we are adding after.  */
3463           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3464           if (arg->tok == tok_bsymbol)
3465             {
3466               /* Now find a section with this name.  */
3467               struct section_list *runp = collate->sections;
3468
3469               while (runp != NULL)
3470                 {
3471                   if (runp->name != NULL
3472                       && strlen (runp->name) == arg->val.str.lenmb
3473                       && memcmp (runp->name, arg->val.str.startmb,
3474                                  arg->val.str.lenmb) == 0)
3475                     break;
3476
3477                   runp = runp->next;
3478                 }
3479
3480               if (runp != NULL)
3481                 collate->current_section = runp;
3482               else
3483                 {
3484                   /* This is bad.  The section after which we have to
3485                      reorder does not exist.  Therefore we cannot
3486                      process the whole rest of this reorder
3487                      specification.  */
3488                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3489                             "LC_COLLATE", (int) arg->val.str.lenmb,
3490                             arg->val.str.startmb);
3491
3492                   do
3493                     {
3494                       lr_ignore_rest (ldfile, 0);
3495
3496                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3497                     }
3498                   while (now->tok == tok_reorder_sections_after
3499                          || now->tok == tok_reorder_sections_end
3500                          || now->tok == tok_end);
3501
3502                   /* Process the token we just saw.  */
3503                   nowtok = now->tok;
3504                   continue;
3505                 }
3506             }
3507           else
3508             /* This must not happen.  */
3509             goto err_label;
3510           break;
3511
3512         case tok_reorder_sections_end:
3513           /* Ignore the rest of the line if we don't need the input of
3514              this line.  */
3515           if (ignore_content)
3516             break;
3517
3518           if (state != 5)
3519             goto err_label;
3520           state = 6;
3521           lr_ignore_rest (ldfile, 1);
3522           break;
3523
3524         case tok_bsymbol:
3525         case tok_ucs4:
3526           /* Ignore the rest of the line if we don't need the input of
3527              this line.  */
3528           if (ignore_content)
3529             {
3530               lr_ignore_rest (ldfile, 0);
3531               break;
3532             }
3533
3534           if (state != 0 && state != 1 && state != 3 && state != 5)
3535             goto err_label;
3536
3537           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3538             goto err_label;
3539
3540           if (nowtok == tok_ucs4)
3541             {
3542               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3543               symstr = ucs4buf;
3544               symlen = 9;
3545             }
3546           else if (arg != NULL)
3547             {
3548               symstr = arg->val.str.startmb;
3549               symlen = arg->val.str.lenmb;
3550             }
3551           else
3552             {
3553               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3554                         (int) ldfile->token.val.str.lenmb,
3555                         ldfile->token.val.str.startmb);
3556               break;
3557             }
3558
3559           struct element_t *seqp;
3560           if (state == 0)
3561             {
3562               /* We are outside an `order_start' region.  This means
3563                  we must only accept definitions of values for
3564                  collation symbols since these are purely abstract
3565                  values and don't need directions associated.  */
3566               void *ptr;
3567
3568               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3569                 {
3570                   seqp = ptr;
3571
3572                   /* It's already defined.  First check whether this
3573                      is really a collating symbol.  */
3574                   if (seqp->is_character)
3575                     goto err_label;
3576
3577                   goto move_entry;
3578                 }
3579               else
3580                 {
3581                   void *result;
3582
3583                   if (find_entry (&collate->sym_table, symstr, symlen,
3584                                   &result) != 0)
3585                     /* No collating symbol, it's an error.  */
3586                     goto err_label;
3587
3588                   /* Maybe this is the first time we define a symbol
3589                      value and it is before the first actual section.  */
3590                   if (collate->sections == NULL)
3591                     collate->sections = collate->current_section =
3592                       &collate->symbol_section;
3593                 }
3594
3595               if (was_ellipsis != tok_none)
3596                 {
3597                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3598                                    charmap, repertoire, result);
3599
3600                   /* Remember that we processed the ellipsis.  */
3601                   was_ellipsis = tok_none;
3602
3603                   /* And don't add the value a second time.  */
3604                   break;
3605                 }
3606             }
3607           else if (state == 3)
3608             {
3609               /* It is possible that we already have this collation sequence.
3610                  In this case we move the entry.  */
3611               void *sym;
3612               void *ptr;
3613
3614               /* If the symbol after which we have to insert was not found
3615                  ignore all entries.  */
3616               if (collate->cursor == NULL)
3617                 {
3618                   lr_ignore_rest (ldfile, 0);
3619                   break;
3620                 }
3621
3622               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3623                 {
3624                   seqp = (struct element_t *) ptr;
3625                   goto move_entry;
3626                 }
3627
3628               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3629                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3630                 goto move_entry;
3631
3632               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3633                   && (seqp = (struct element_t *) ptr,
3634                       seqp->last != NULL || seqp->next != NULL
3635                       || (collate->start != NULL && seqp == collate->start)))
3636                 {
3637                 move_entry:
3638                   /* Remove the entry from the old position.  */
3639                   if (seqp->last == NULL)
3640                     collate->start = seqp->next;
3641                   else
3642                     seqp->last->next = seqp->next;
3643                   if (seqp->next != NULL)
3644                     seqp->next->last = seqp->last;
3645
3646                   /* We also have to check whether this entry is the
3647                      first or last of a section.  */
3648                   if (seqp->section->first == seqp)
3649                     {
3650                       if (seqp->section->first == seqp->section->last)
3651                         /* This section has no content anymore.  */
3652                         seqp->section->first = seqp->section->last = NULL;
3653                       else
3654                         seqp->section->first = seqp->next;
3655                     }
3656                   else if (seqp->section->last == seqp)
3657                     seqp->section->last = seqp->last;
3658
3659                   /* Now insert it in the new place.  */
3660                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3661                                   tok_none);
3662                   break;
3663                 }
3664
3665               /* Otherwise we just add a new entry.  */
3666             }
3667           else if (state == 5)
3668             {
3669               /* We are reordering sections.  Find the named section.  */
3670               struct section_list *runp = collate->sections;
3671               struct section_list *prevp = NULL;
3672
3673               while (runp != NULL)
3674                 {
3675                   if (runp->name != NULL
3676                       && strlen (runp->name) == symlen
3677                       && memcmp (runp->name, symstr, symlen) == 0)
3678                     break;
3679
3680                   prevp = runp;
3681                   runp = runp->next;
3682                 }
3683
3684               if (runp == NULL)
3685                 {
3686                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3687                             "LC_COLLATE", (int) symlen, symstr);
3688                   lr_ignore_rest (ldfile, 0);
3689                 }
3690               else
3691                 {
3692                   if (runp != collate->current_section)
3693                     {
3694                       /* Remove the named section from the old place and
3695                          insert it in the new one.  */
3696                       prevp->next = runp->next;
3697
3698                       runp->next = collate->current_section->next;
3699                       collate->current_section->next = runp;
3700                       collate->current_section = runp;
3701                     }
3702
3703                   /* Process the rest of the line which might change
3704                      the collation rules.  */
3705                   arg = lr_token (ldfile, charmap, result, repertoire,
3706                                   verbose);
3707                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3708                     read_directions (ldfile, arg, charmap, repertoire,
3709                                      result);
3710                 }
3711               break;
3712             }
3713           else if (was_ellipsis != tok_none)
3714             {
3715               /* Using the information in the `ellipsis_weight'
3716                  element and this and the last value we have to handle
3717                  the ellipsis now.  */
3718               assert (state == 1);
3719
3720               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3721                                repertoire, result);
3722
3723               /* Remember that we processed the ellipsis.  */
3724               was_ellipsis = tok_none;
3725
3726               /* And don't add the value a second time.  */
3727               break;
3728             }
3729
3730           /* Now insert in the new place.  */
3731           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3732           break;
3733
3734         case tok_undefined:
3735           /* Ignore the rest of the line if we don't need the input of
3736              this line.  */
3737           if (ignore_content)
3738             {
3739               lr_ignore_rest (ldfile, 0);
3740               break;
3741             }
3742
3743           if (state != 1)
3744             goto err_label;
3745
3746           if (was_ellipsis != tok_none)
3747             {
3748               lr_error (ldfile,
3749                         _("%s: cannot have `%s' as end of ellipsis range"),
3750                         "LC_COLLATE", "UNDEFINED");
3751
3752               unlink_element (collate);
3753               was_ellipsis = tok_none;
3754             }
3755
3756           /* See whether UNDEFINED already appeared somewhere.  */
3757           if (collate->undefined.next != NULL
3758               || &collate->undefined == collate->cursor)
3759             {
3760               lr_error (ldfile,
3761                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3762                         "LC_COLLATE", 9, "UNDEFINED",
3763                         collate->undefined.file,
3764                         collate->undefined.line);
3765               lr_ignore_rest (ldfile, 0);
3766             }
3767           else
3768             /* Parse the weights.  */
3769              insert_weights (ldfile, &collate->undefined, charmap,
3770                              repertoire, result, tok_none);
3771           break;
3772
3773         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3774         case tok_ellipsis3: /* absolute ellipsis */
3775         case tok_ellipsis4: /* symbolic decimal ellipsis */
3776           /* This is the symbolic (decimal or hexadecimal) or absolute
3777              ellipsis.  */
3778           if (was_ellipsis != tok_none)
3779             goto err_label;
3780
3781           if (state != 0 && state != 1 && state != 3)
3782             goto err_label;
3783
3784           was_ellipsis = nowtok;
3785
3786           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3787                           repertoire, result, nowtok);
3788           break;
3789
3790         case tok_end:
3791           /* Next we assume `LC_COLLATE'.  */
3792           if (!ignore_content)
3793             {
3794               if (state == 0)
3795                 /* We must either see a copy statement or have
3796                    ordering values.  */
3797                 lr_error (ldfile,
3798                           _("%s: empty category description not allowed"),
3799                           "LC_COLLATE");
3800               else if (state == 1)
3801                 {
3802                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3803                             "LC_COLLATE");
3804
3805                   /* Handle ellipsis at end of list.  */
3806                   if (was_ellipsis != tok_none)
3807                     {
3808                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3809                                        repertoire, result);
3810                       was_ellipsis = tok_none;
3811                     }
3812                 }
3813               else if (state == 3)
3814                 WITH_CUR_LOCALE (error (0, 0, _("\
3815 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3816               else if (state == 5)
3817                 WITH_CUR_LOCALE (error (0, 0, _("\
3818 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3819             }
3820           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3821           if (arg->tok == tok_eof)
3822             break;
3823           if (arg->tok == tok_eol)
3824             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3825           else if (arg->tok != tok_lc_collate)
3826             lr_error (ldfile, _("\
3827 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3828           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3829           return;
3830
3831         default:
3832         err_label:
3833           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3834         }
3835
3836       /* Prepare for the next round.  */
3837       now = lr_token (ldfile, charmap, result, NULL, verbose);
3838       nowtok = now->tok;
3839     }
3840
3841   /* When we come here we reached the end of the file.  */
3842   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3843 }