locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <errno.h>
  23 #include <error.h>
  24 #include <stdlib.h>
  25 #include <wchar.h>
  26 #include <stdint.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35 #include "../localeinfo.h"
  36
  37 /* Uncomment the following line in the production version.  */
  38 /* #define NDEBUG 1 */
  39 #include <assert.h>
  40
  41 #define obstack_chunk_alloc malloc
  42 #define obstack_chunk_free free
  43
  44 static inline void
  45 __attribute ((always_inline))
  46 obstack_int32_grow (struct obstack *obstack, int32_t data)
  47 {
  48   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  49   data = maybe_swap_uint32 (data);
  50   if (sizeof (int32_t) == sizeof (int))
  51     obstack_int_grow (obstack, data);
  52   else
  53     obstack_grow (obstack, &data, sizeof (int32_t));
  54 }
  55
  56 static inline void
  57 __attribute ((always_inline))
  58 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  59 {
  60   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  61   data = maybe_swap_uint32 (data);
  62   if (sizeof (int32_t) == sizeof (int))
  63     obstack_int_grow_fast (obstack, data);
  64   else
  65     obstack_grow (obstack, &data, sizeof (int32_t));
  66 }
  67
  68 /* Forward declaration.  */
  69 struct element_t;
  70
  71 /* Data type for list of strings.  */
  72 struct section_list
  73 {
  74   /* Successor in the known_sections list.  */
  75   struct section_list *def_next;
  76   /* Successor in the sections list.  */
  77   struct section_list *next;
  78   /* Name of the section.  */
  79   const char *name;
  80   /* First element of this section.  */
  81   struct element_t *first;
  82   /* Last element of this section.  */
  83   struct element_t *last;
  84   /* These are the rules for this section.  */
  85   enum coll_sort_rule *rules;
  86   /* Index of the rule set in the appropriate section of the output file.  */
  87   int ruleidx;
  88 };
  89
  90 struct element_t;
  91
  92 struct element_list_t
  93 {
  94   /* Number of elements.  */
  95   int cnt;
  96
  97   struct element_t **w;
  98 };
  99
 100 /* Data type for collating element.  */
 101 struct element_t
 102 {
 103   const char *name;
 104
 105   const char *mbs;
 106   size_t nmbs;
 107   const uint32_t *wcs;
 108   size_t nwcs;
 109   int *mborder;
 110   int wcorder;
 111
 112   /* The following is a bit mask which bits are set if this element is
 113      used in the appropriate level.  Interesting for the singlebyte
 114      weight computation.
 115
 116      XXX The type here restricts the number of levels to 32.  It could
 117      be changed if necessary but I doubt this is necessary.  */
 118   unsigned int used_in_level;
 119
 120   struct element_list_t *weights;
 121
 122   /* Nonzero if this is a real character definition.  */
 123   int is_character;
 124
 125   /* Order of the character in the sequence.  This information will
 126      be used in range expressions.  */
 127   int mbseqorder;
 128   int wcseqorder;
 129
 130   /* Where does the definition come from.  */
 131   const char *file;
 132   size_t line;
 133
 134   /* Which section does this belong to.  */
 135   struct section_list *section;
 136
 137   /* Predecessor and successor in the order list.  */
 138   struct element_t *last;
 139   struct element_t *next;
 140
 141   /* Next element in multibyte output list.  */
 142   struct element_t *mbnext;
 143   struct element_t *mblast;
 144
 145   /* Next element in wide character output list.  */
 146   struct element_t *wcnext;
 147   struct element_t *wclast;
 148 };
 149
 150 /* Special element value.  */
 151 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 152 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 153 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 154
 155 /* Data type for collating symbol.  */
 156 struct symbol_t
 157 {
 158   const char *name;
 159
 160   /* Point to place in the order list.  */
 161   struct element_t *order;
 162
 163   /* Where does the definition come from.  */
 164   const char *file;
 165   size_t line;
 166 };
 167
 168 /* Sparse table of struct element_t *.  */
 169 #define TABLE wchead_table
 170 #define ELEMENT struct element_t *
 171 #define DEFAULT NULL
 172 #define ITERATE
 173 #define NO_ADD_LOCALE
 174 #include "3level.h"
 175
 176 /* Sparse table of int32_t.  */
 177 #define TABLE collidx_table
 178 #define ELEMENT int32_t
 179 #define DEFAULT 0
 180 #include "3level.h"
 181
 182 /* Sparse table of uint32_t.  */
 183 #define TABLE collseq_table
 184 #define ELEMENT uint32_t
 185 #define DEFAULT ~((uint32_t) 0)
 186 #include "3level.h"
 187
 188
 189 /* Simple name list for the preprocessor.  */
 190 struct name_list
 191 {
 192   struct name_list *next;
 193   char str[0];
 194 };
 195
 196
 197 /* The real definition of the struct for the LC_COLLATE locale.  */
 198 struct locale_collate_t
 199 {
 200   int col_weight_max;
 201   int cur_weight_max;
 202
 203   /* List of known scripts.  */
 204   struct section_list *known_sections;
 205   /* List of used sections.  */
 206   struct section_list *sections;
 207   /* Current section using definition.  */
 208   struct section_list *current_section;
 209   /* There always can be an unnamed section.  */
 210   struct section_list unnamed_section;
 211   /* Flag whether the unnamed section has been defined.  */
 212   bool unnamed_section_defined;
 213   /* To make handling of errors easier we have another section.  */
 214   struct section_list error_section;
 215   /* Sometimes we are defining the values for collating symbols before
 216      the first actual section.  */
 217   struct section_list symbol_section;
 218
 219   /* Start of the order list.  */
 220   struct element_t *start;
 221
 222   /* The undefined element.  */
 223   struct element_t undefined;
 224
 225   /* This is the cursor for `reorder_after' insertions.  */
 226   struct element_t *cursor;
 227
 228   /* This value is used when handling ellipsis.  */
 229   struct element_t ellipsis_weight;
 230
 231   /* Known collating elements.  */
 232   hash_table elem_table;
 233
 234   /* Known collating symbols.  */
 235   hash_table sym_table;
 236
 237   /* Known collation sequences.  */
 238   hash_table seq_table;
 239
 240   struct obstack mempool;
 241
 242   /* The LC_COLLATE category is a bit special as it is sometimes possible
 243      that the definitions from more than one input file contains information.
 244      Therefore we keep all relevant input in a list.  */
 245   struct locale_collate_t *next;
 246
 247   /* Arrays with heads of the list for each of the leading bytes in
 248      the multibyte sequences.  */
 249   struct element_t *mbheads[256];
 250
 251   /* Arrays with heads of the list for each of the leading bytes in
 252      the multibyte sequences.  */
 253   struct wchead_table wcheads;
 254
 255   /* The arrays with the collation sequence order.  */
 256   unsigned char mbseqorder[256];
 257   struct collseq_table wcseqorder;
 258
 259   /* State of the preprocessor.  */
 260   enum
 261     {
 262       else_none = 0,
 263       else_ignore,
 264       else_seen
 265     }
 266     else_action;
 267 };
 268
 269
 270 /* We have a few global variables which are used for reading all
 271    LC_COLLATE category descriptions in all files.  */
 272 static uint32_t nrules;
 273
 274 /* List of defined preprocessor symbols.  */
 275 static struct name_list *defined;
 276
 277
 278 /* We need UTF-8 encoding of numbers.  */
 279 static inline int
 280 __attribute ((always_inline))
 281 utf8_encode (char *buf, int val)
 282 {
 283   int retval;
 284
 285   if (val < 0x80)
 286     {
 287       *buf++ = (char) val;
 288       retval = 1;
 289     }
 290   else
 291     {
 292       int step;
 293
 294       for (step = 2; step < 6; ++step)
 295         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 296           break;
 297       retval = step;
 298
 299       *buf = (unsigned char) (~0xff >> step);
 300       --step;
 301       do
 302         {
 303           buf[step] = 0x80 | (val & 0x3f);
 304           val >>= 6;
 305         }
 306       while (--step > 0);
 307       *buf |= val;
 308     }
 309
 310   return retval;
 311 }
 312
 313
 314 static struct section_list *
 315 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 316                    struct section_list *next)
 317 {
 318   struct section_list *newp;
 319
 320   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 321                                                 sizeof (*newp));
 322   newp->next = next;
 323   newp->name = string;
 324   newp->first = NULL;
 325   newp->last = NULL;
 326
 327   return newp;
 328 }
 329
 330
 331 static struct element_t *
 332 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 333              const uint32_t *wcs, const char *name, size_t namelen,
 334              int is_character)
 335 {
 336   struct element_t *newp;
 337
 338   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 339                                              sizeof (*newp));
 340   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 341                                                     name, namelen);
 342   if (mbs != NULL)
 343     {
 344       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 345       newp->nmbs = mbslen;
 346     }
 347   else
 348     {
 349       newp->mbs = NULL;
 350       newp->nmbs = 0;
 351     }
 352   if (wcs != NULL)
 353     {
 354       size_t nwcs = wcslen ((wchar_t *) wcs);
 355       uint32_t zero = 0;
 356       /* Handle <U0000> as a single character.  */
 357       if (nwcs == 0)
 358         nwcs = 1;
 359       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 360       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 361       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 362       newp->nwcs = nwcs;
 363     }
 364   else
 365     {
 366       newp->wcs = NULL;
 367       newp->nwcs = 0;
 368     }
 369   newp->mborder = NULL;
 370   newp->wcorder = 0;
 371   newp->used_in_level = 0;
 372   newp->is_character = is_character;
 373
 374   /* Will be assigned later.  XXX  */
 375   newp->mbseqorder = 0;
 376   newp->wcseqorder = 0;
 377
 378   /* Will be allocated later.  */
 379   newp->weights = NULL;
 380
 381   newp->file = NULL;
 382   newp->line = 0;
 383
 384   newp->section = collate->current_section;
 385
 386   newp->last = NULL;
 387   newp->next = NULL;
 388
 389   newp->mbnext = NULL;
 390   newp->mblast = NULL;
 391
 392   newp->wcnext = NULL;
 393   newp->wclast = NULL;
 394
 395   return newp;
 396 }
 397
 398
 399 static struct symbol_t *
 400 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 401 {
 402   struct symbol_t *newp;
 403
 404   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 405
 406   newp->name = obstack_copy0 (&collate->mempool, name, len);
 407   newp->order = NULL;
 408
 409   newp->file = NULL;
 410   newp->line = 0;
 411
 412   return newp;
 413 }
 414
 415
 416 /* Test whether this name is already defined somewhere.  */
 417 static int
 418 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 419                  const struct charmap_t *charmap,
 420                  struct repertoire_t *repertoire, const char *symbol,
 421                  size_t symbol_len)
 422 {
 423   void *ignore = NULL;
 424
 425   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 426     {
 427       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 428                 (int) symbol_len, symbol);
 429       return 1;
 430     }
 431
 432   if (repertoire != NULL
 433       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 434           == 0))
 435     {
 436       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 437                 (int) symbol_len, symbol);
 438       return 1;
 439     }
 440
 441   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 442     {
 443       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 444                 (int) symbol_len, symbol);
 445       return 1;
 446     }
 447
 448   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 449     {
 450       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 451                 (int) symbol_len, symbol);
 452       return 1;
 453     }
 454
 455   return 0;
 456 }
 457
 458
 459 /* Read the direction specification.  */
 460 static void
 461 read_directions (struct linereader *ldfile, struct token *arg,
 462                  const struct charmap_t *charmap,
 463                  struct repertoire_t *repertoire, struct localedef_t *result)
 464 {
 465   int cnt = 0;
 466   int max = nrules ?: 10;
 467   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 468   int warned = 0;
 469   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 470
 471   while (1)
 472     {
 473       int valid = 0;
 474
 475       if (arg->tok == tok_forward)
 476         {
 477           if (rules[cnt] & sort_backward)
 478             {
 479               if (! warned)
 480                 {
 481                   lr_error (ldfile, _("\
 482 %s: `forward' and `backward' are mutually excluding each other"),
 483                             "LC_COLLATE");
 484                   warned = 1;
 485                 }
 486             }
 487           else if (rules[cnt] & sort_forward)
 488             {
 489               if (! warned)
 490                 {
 491                   lr_error (ldfile, _("\
 492 %s: `%s' mentioned more than once in definition of weight %d"),
 493                             "LC_COLLATE", "forward", cnt + 1);
 494                 }
 495             }
 496           else
 497             rules[cnt] |= sort_forward;
 498
 499           valid = 1;
 500         }
 501       else if (arg->tok == tok_backward)
 502         {
 503           if (rules[cnt] & sort_forward)
 504             {
 505               if (! warned)
 506                 {
 507                   lr_error (ldfile, _("\
 508 %s: `forward' and `backward' are mutually excluding each other"),
 509                             "LC_COLLATE");
 510                   warned = 1;
 511                 }
 512             }
 513           else if (rules[cnt] & sort_backward)
 514             {
 515               if (! warned)
 516                 {
 517                   lr_error (ldfile, _("\
 518 %s: `%s' mentioned more than once in definition of weight %d"),
 519                             "LC_COLLATE", "backward", cnt + 1);
 520                 }
 521             }
 522           else
 523             rules[cnt] |= sort_backward;
 524
 525           valid = 1;
 526         }
 527       else if (arg->tok == tok_position)
 528         {
 529           if (rules[cnt] & sort_position)
 530             {
 531               if (! warned)
 532                 {
 533                   lr_error (ldfile, _("\
 534 %s: `%s' mentioned more than once in definition of weight %d"),
 535                             "LC_COLLATE", "position", cnt + 1);
 536                 }
 537             }
 538           else
 539             rules[cnt] |= sort_position;
 540
 541           valid = 1;
 542         }
 543
 544       if (valid)
 545         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 546
 547       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 548           || arg->tok == tok_semicolon)
 549         {
 550           if (! valid && ! warned)
 551             {
 552               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 553               warned = 1;
 554             }
 555
 556           /* See whether we have to increment the counter.  */
 557           if (arg->tok != tok_comma && rules[cnt] != 0)
 558             {
 559               /* Add the default `forward' if we have seen only `position'.  */
 560               if (rules[cnt] == sort_position)
 561                 rules[cnt] = sort_position | sort_forward;
 562
 563               ++cnt;
 564             }
 565
 566           if (arg->tok == tok_eof || arg->tok == tok_eol)
 567             /* End of line or file, so we exit the loop.  */
 568             break;
 569
 570           if (nrules == 0)
 571             {
 572               /* See whether we have enough room in the array.  */
 573               if (cnt == max)
 574                 {
 575                   max += 10;
 576                   rules = (enum coll_sort_rule *) xrealloc (rules,
 577                                                             max
 578                                                             * sizeof (*rules));
 579                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 580                 }
 581             }
 582           else
 583             {
 584               if (cnt == nrules)
 585                 {
 586                   /* There must not be any more rule.  */
 587                   if (! warned)
 588                     {
 589                       lr_error (ldfile, _("\
 590 %s: too many rules; first entry only had %d"),
 591                                 "LC_COLLATE", nrules);
 592                       warned = 1;
 593                     }
 594
 595                   lr_ignore_rest (ldfile, 0);
 596                   break;
 597                 }
 598             }
 599         }
 600       else
 601         {
 602           if (! warned)
 603             {
 604               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 605               warned = 1;
 606             }
 607         }
 608
 609       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 610     }
 611
 612   if (nrules == 0)
 613     {
 614       /* Now we know how many rules we have.  */
 615       nrules = cnt;
 616       rules = (enum coll_sort_rule *) xrealloc (rules,
 617                                                 nrules * sizeof (*rules));
 618     }
 619   else
 620     {
 621       if (cnt < nrules)
 622         {
 623           /* Not enough rules in this specification.  */
 624           if (! warned)
 625             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 626
 627           do
 628             rules[cnt] = sort_forward;
 629           while (++cnt < nrules);
 630         }
 631     }
 632
 633   collate->current_section->rules = rules;
 634 }
 635
 636
 637 static struct element_t *
 638 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 639               const char *str, size_t len)
 640 {
 641   void *result = NULL;
 642
 643   /* Search for the entries among the collation sequences already define.  */
 644   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 645     {
 646       /* Nope, not define yet.  So we see whether it is a
 647          collation symbol.  */
 648       void *ptr;
 649
 650       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 651         {
 652           /* It's a collation symbol.  */
 653           struct symbol_t *sym = (struct symbol_t *) ptr;
 654           result = sym->order;
 655
 656           if (result == NULL)
 657             result = sym->order = new_element (collate, NULL, 0, NULL,
 658                                                NULL, 0, 0);
 659         }
 660       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 661         {
 662           /* It's also no collation element.  So it is a character
 663              element defined later.  */
 664           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 665           /* Insert it into the sequence table.  */
 666           insert_entry (&collate->seq_table, str, len, result);
 667         }
 668     }
 669
 670   return (struct element_t *) result;
 671 }
 672
 673
 674 static void
 675 unlink_element (struct locale_collate_t *collate)
 676 {
 677   if (collate->cursor == collate->start)
 678     {
 679       assert (collate->cursor->next == NULL);
 680       assert (collate->cursor->last == NULL);
 681       collate->cursor = NULL;
 682     }
 683   else
 684     {
 685       if (collate->cursor->next != NULL)
 686         collate->cursor->next->last = collate->cursor->last;
 687       if (collate->cursor->last != NULL)
 688         collate->cursor->last->next = collate->cursor->next;
 689       collate->cursor = collate->cursor->last;
 690     }
 691 }
 692
 693
 694 static void
 695 insert_weights (struct linereader *ldfile, struct element_t *elem,
 696                 const struct charmap_t *charmap,
 697                 struct repertoire_t *repertoire, struct localedef_t *result,
 698                 enum token_t ellipsis)
 699 {
 700   int weight_cnt;
 701   struct token *arg;
 702   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 703
 704   /* Initialize all the fields.  */
 705   elem->file = ldfile->fname;
 706   elem->line = ldfile->lineno;
 707
 708   elem->last = collate->cursor;
 709   elem->next = collate->cursor ? collate->cursor->next : NULL;
 710   if (collate->cursor != NULL && collate->cursor->next != NULL)
 711     collate->cursor->next->last = elem;
 712   if (collate->cursor != NULL)
 713     collate->cursor->next = elem;
 714   if (collate->start == NULL)
 715     {
 716       assert (collate->cursor == NULL);
 717       collate->start = elem;
 718     }
 719
 720   elem->section = collate->current_section;
 721
 722   if (collate->current_section->first == NULL)
 723     collate->current_section->first = elem;
 724   if (collate->current_section->last == collate->cursor)
 725     collate->current_section->last = elem;
 726
 727   collate->cursor = elem;
 728
 729   elem->weights = (struct element_list_t *)
 730     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 731   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 732
 733   weight_cnt = 0;
 734
 735   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 736   do
 737     {
 738       if (arg->tok == tok_eof || arg->tok == tok_eol)
 739         break;
 740
 741       if (arg->tok == tok_ignore)
 742         {
 743           /* The weight for this level has to be ignored.  We use the
 744              null pointer to indicate this.  */
 745           elem->weights[weight_cnt].w = (struct element_t **)
 746             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 747           elem->weights[weight_cnt].w[0] = NULL;
 748           elem->weights[weight_cnt].cnt = 1;
 749         }
 750       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 751         {
 752           char ucs4str[10];
 753           struct element_t *val;
 754           char *symstr;
 755           size_t symlen;
 756
 757           if (arg->tok == tok_bsymbol)
 758             {
 759               symstr = arg->val.str.startmb;
 760               symlen = arg->val.str.lenmb;
 761             }
 762           else
 763             {
 764               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 765               symstr = ucs4str;
 766               symlen = 9;
 767             }
 768
 769           val = find_element (ldfile, collate, symstr, symlen);
 770           if (val == NULL)
 771             break;
 772
 773           elem->weights[weight_cnt].w = (struct element_t **)
 774             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 775           elem->weights[weight_cnt].w[0] = val;
 776           elem->weights[weight_cnt].cnt = 1;
 777         }
 778       else if (arg->tok == tok_string)
 779         {
 780           /* Split the string up in the individual characters and put
 781              the element definitions in the list.  */
 782           const char *cp = arg->val.str.startmb;
 783           int cnt = 0;
 784           struct element_t *charelem;
 785           struct element_t **weights = NULL;
 786           int max = 0;
 787
 788           if (*cp == '\0')
 789             {
 790               lr_error (ldfile, _("%s: empty weight string not allowed"),
 791                         "LC_COLLATE");
 792               lr_ignore_rest (ldfile, 0);
 793               break;
 794             }
 795
 796           do
 797             {
 798               if (*cp == '<')
 799                 {
 800                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 801                      the latter we have to unify the name.  */
 802                   const char *startp = ++cp;
 803                   size_t len;
 804
 805                   while (*cp != '>')
 806                     {
 807                       if (*cp == ldfile->escape_char)
 808                         ++cp;
 809                       if (*cp == '\0')
 810                         /* It's a syntax error.  */
 811                         goto syntax;
 812
 813                       ++cp;
 814                     }
 815
 816                   if (cp - startp == 5 && startp[0] == 'U'
 817                       && isxdigit (startp[1]) && isxdigit (startp[2])
 818                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 819                     {
 820                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 821                       char *newstr;
 822
 823                       newstr = (char *) xmalloc (10);
 824                       snprintf (newstr, 10, "U%08X", ucs4);
 825                       startp = newstr;
 826
 827                       len = 9;
 828                     }
 829                   else
 830                     len = cp - startp;
 831
 832                   charelem = find_element (ldfile, collate, startp, len);
 833                   ++cp;
 834                 }
 835               else
 836                 {
 837                   /* People really shouldn't use characters directly in
 838                      the string.  Especially since it's not really clear
 839                      what this means.  We interpret all characters in the
 840                      string as if that would be bsymbols.  Otherwise we
 841                      would have to match back to bsymbols somehow and this
 842                      is normally not what people normally expect.  */
 843                   charelem = find_element (ldfile, collate, cp++, 1);
 844                 }
 845
 846               if (charelem == NULL)
 847                 {
 848                   /* We ignore the rest of the line.  */
 849                   lr_ignore_rest (ldfile, 0);
 850                   break;
 851                 }
 852
 853               /* Add the pointer.  */
 854               if (cnt >= max)
 855                 {
 856                   struct element_t **newp;
 857                   max += 10;
 858                   newp = (struct element_t **)
 859                     alloca (max * sizeof (struct element_t *));
 860                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 861                   weights = newp;
 862                 }
 863               weights[cnt++] = charelem;
 864             }
 865           while (*cp != '\0');
 866
 867           /* Now store the information.  */
 868           elem->weights[weight_cnt].w = (struct element_t **)
 869             obstack_alloc (&collate->mempool,
 870                            cnt * sizeof (struct element_t *));
 871           memcpy (elem->weights[weight_cnt].w, weights,
 872                   cnt * sizeof (struct element_t *));
 873           elem->weights[weight_cnt].cnt = cnt;
 874
 875           /* We don't need the string anymore.  */
 876           free (arg->val.str.startmb);
 877         }
 878       else if (ellipsis != tok_none
 879                && (arg->tok == tok_ellipsis2
 880                    || arg->tok == tok_ellipsis3
 881                    || arg->tok == tok_ellipsis4))
 882         {
 883           /* It must be the same ellipsis as used in the initial column.  */
 884           if (arg->tok != ellipsis)
 885             lr_error (ldfile, _("\
 886 %s: weights must use the same ellipsis symbol as the name"),
 887                       "LC_COLLATE");
 888
 889           /* The weight for this level will depend on the element
 890              iterating over the range.  Put a placeholder.  */
 891           elem->weights[weight_cnt].w = (struct element_t **)
 892             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 893           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 894           elem->weights[weight_cnt].cnt = 1;
 895         }
 896       else
 897         {
 898         syntax:
 899           /* It's a syntax error.  */
 900           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 901           lr_ignore_rest (ldfile, 0);
 902           break;
 903         }
 904
 905       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 906       /* This better should be the end of the line or a semicolon.  */
 907       if (arg->tok == tok_semicolon)
 908         /* OK, ignore this and read the next token.  */
 909         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 910       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 911         {
 912           /* It's a syntax error.  */
 913           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 914           lr_ignore_rest (ldfile, 0);
 915           break;
 916         }
 917     }
 918   while (++weight_cnt < nrules);
 919
 920   if (weight_cnt < nrules)
 921     {
 922       /* This means the rest of the line uses the current element as
 923          the weight.  */
 924       do
 925         {
 926           elem->weights[weight_cnt].w = (struct element_t **)
 927             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 928           if (ellipsis == tok_none)
 929             elem->weights[weight_cnt].w[0] = elem;
 930           else
 931             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 932           elem->weights[weight_cnt].cnt = 1;
 933         }
 934       while (++weight_cnt < nrules);
 935     }
 936   else
 937     {
 938       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 939         {
 940           /* Too many rule values.  */
 941           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 942           lr_ignore_rest (ldfile, 0);
 943         }
 944       else
 945         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 946     }
 947 }
 948
 949
 950 static int
 951 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 952               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 953               struct localedef_t *result)
 954 {
 955   /* First find out what kind of symbol this is.  */
 956   struct charseq *seq;
 957   uint32_t wc;
 958   struct element_t *elem = NULL;
 959   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 960
 961   /* Try to find the character in the charmap.  */
 962   seq = charmap_find_value (charmap, symstr, symlen);
 963
 964   /* Determine the wide character.  */
 965   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 966     {
 967       wc = repertoire_find_value (repertoire, symstr, symlen);
 968       if (seq != NULL)
 969         seq->ucs4 = wc;
 970     }
 971   else
 972     wc = seq->ucs4;
 973
 974   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 975     {
 976       /* It's no character, so look through the collation elements and
 977          symbol list.  */
 978       void *ptr = elem;
 979       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 980         {
 981           void *result;
 982           struct symbol_t *sym = NULL;
 983
 984           /* It's also collation element.  Therefore it's either a
 985              collating symbol or it's a character which is not
 986              supported by the character set.  In the later case we
 987              simply create a dummy entry.  */
 988           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 989             {
 990               /* It's a collation symbol.  */
 991               sym = (struct symbol_t *) result;
 992
 993               elem = sym->order;
 994             }
 995
 996           if (elem == NULL)
 997             {
 998               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 999
1000               if (sym != NULL)
1001                 sym->order = elem;
1002               else
1003                 /* Enter a fake element in the sequence table.  This
1004                    won't cause anything in the output since there is
1005                    no multibyte or wide character associated with
1006                    it.  */
1007                 insert_entry (&collate->seq_table, symstr, symlen, elem);
1008             }
1009         }
1010       else
1011         /* Copy the result back.  */
1012         elem = ptr;
1013     }
1014   else
1015     {
1016       /* Otherwise the symbols stands for a character.  */
1017       void *ptr = elem;
1018       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1019         {
1020           uint32_t wcs[2] = { wc, 0 };
1021
1022           /* We have to allocate an entry.  */
1023           elem = new_element (collate,
1024                               seq != NULL ? (char *) seq->bytes : NULL,
1025                               seq != NULL ? seq->nbytes : 0,
1026                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1027                               symstr, symlen, 1);
1028
1029           /* And add it to the table.  */
1030           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1031             /* This cannot happen.  */
1032             assert (! "Internal error");
1033         }
1034       else
1035         {
1036           /* Copy the result back.  */
1037           elem = ptr;
1038
1039           /* Maybe the character was used before the definition.  In this case
1040              we have to insert the byte sequences now.  */
1041           if (elem->mbs == NULL && seq != NULL)
1042             {
1043               elem->mbs = obstack_copy0 (&collate->mempool,
1044                                          seq->bytes, seq->nbytes);
1045               elem->nmbs = seq->nbytes;
1046             }
1047
1048           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1049             {
1050               uint32_t wcs[2] = { wc, 0 };
1051
1052               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1053               elem->nwcs = 1;
1054             }
1055         }
1056     }
1057
1058   /* Test whether this element is not already in the list.  */
1059   if (elem->next != NULL || elem == collate->cursor)
1060     {
1061       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1062                 (int) symlen, symstr, elem->file, elem->line);
1063       lr_ignore_rest (ldfile, 0);
1064       return 1;
1065     }
1066
1067   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1068
1069   return 0;
1070 }
1071
1072
1073 static void
1074 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1075                  enum token_t ellipsis, const struct charmap_t *charmap,
1076                  struct repertoire_t *repertoire,
1077                  struct localedef_t *result)
1078 {
1079   struct element_t *startp;
1080   struct element_t *endp;
1081   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1082
1083   /* Unlink the entry added for the ellipsis.  */
1084   unlink_element (collate);
1085   startp = collate->cursor;
1086
1087   /* Process and add the end-entry.  */
1088   if (symstr != NULL
1089       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1090     /* Something went wrong with inserting the to-value.  This means
1091        we cannot process the ellipsis.  */
1092     return;
1093
1094   /* Reset the cursor.  */
1095   collate->cursor = startp;
1096
1097   /* Now we have to handle many different situations:
1098      - we have to distinguish between the three different ellipsis forms
1099      - the is the ellipsis at the beginning, in the middle, or at the end.
1100   */
1101   endp = collate->cursor->next;
1102   assert (symstr == NULL || endp != NULL);
1103
1104   /* XXX The following is probably very wrong since also collating symbols
1105      can appear in ranges.  But do we want/can refine the test for that?  */
1106 #if 0
1107   /* Both, the start and the end symbol, must stand for characters.  */
1108   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1109       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1110     {
1111       lr_error (ldfile, _("\
1112 %s: the start and the end symbol of a range must stand for characters"),
1113                 "LC_COLLATE");
1114       return;
1115     }
1116 #endif
1117
1118   if (ellipsis == tok_ellipsis3)
1119     {
1120       /* One requirement we make here: the length of the byte
1121          sequences for the first and end character must be the same.
1122          This is mainly to prevent unwanted effects and this is often
1123          not what is wanted.  */
1124       size_t len = (startp->mbs != NULL ? startp->nmbs
1125                     : (endp->mbs != NULL ? endp->nmbs : 0));
1126       char mbcnt[len + 1];
1127       char mbend[len + 1];
1128
1129       /* Well, this should be caught somewhere else already.  Just to
1130          make sure.  */
1131       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1132       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1133
1134       if (startp != NULL && endp != NULL
1135           && startp->mbs != NULL && endp->mbs != NULL
1136           && startp->nmbs != endp->nmbs)
1137         {
1138           lr_error (ldfile, _("\
1139 %s: byte sequences of first and last character must have the same length"),
1140                     "LC_COLLATE");
1141           return;
1142         }
1143
1144       /* Determine whether we have to generate multibyte sequences.  */
1145       if ((startp == NULL || startp->mbs != NULL)
1146           && (endp == NULL || endp->mbs != NULL))
1147         {
1148           int cnt;
1149           int ret;
1150
1151           /* Prepare the beginning byte sequence.  This is either from the
1152              beginning byte sequence or it is all nulls if it was an
1153              initial ellipsis.  */
1154           if (startp == NULL || startp->mbs == NULL)
1155             memset (mbcnt, '\0', len);
1156           else
1157             {
1158               memcpy (mbcnt, startp->mbs, len);
1159
1160               /* And increment it so that the value is the first one we will
1161                  try to insert.  */
1162               for (cnt = len - 1; cnt >= 0; --cnt)
1163                 if (++mbcnt[cnt] != '\0')
1164                   break;
1165             }
1166           mbcnt[len] = '\0';
1167
1168           /* And the end sequence.  */
1169           if (endp == NULL || endp->mbs == NULL)
1170             memset (mbend, '\0', len);
1171           else
1172             memcpy (mbend, endp->mbs, len);
1173           mbend[len] = '\0';
1174
1175           /* Test whether we have a correct range.  */
1176           ret = memcmp (mbcnt, mbend, len);
1177           if (ret >= 0)
1178             {
1179               if (ret > 0)
1180                 lr_error (ldfile, _("%s: byte sequence of first character of \
1181 range is not lower than that of the last character"), "LC_COLLATE");
1182               return;
1183             }
1184
1185           /* Generate the byte sequences data.  */
1186           while (1)
1187             {
1188               struct charseq *seq;
1189
1190               /* Quite a bit of work ahead.  We have to find the character
1191                  definition for the byte sequence and then determine the
1192                  wide character belonging to it.  */
1193               seq = charmap_find_symbol (charmap, mbcnt, len);
1194               if (seq != NULL)
1195                 {
1196                   struct element_t *elem;
1197                   size_t namelen;
1198
1199                   /* I don't think this can ever happen.  */
1200                   assert (seq->name != NULL);
1201                   namelen = strlen (seq->name);
1202
1203                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1204                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1205                                                        namelen);
1206
1207                   /* Now we are ready to insert the new value in the
1208                      sequence.  Find out whether the element is
1209                      already known.  */
1210                   void *ptr;
1211                   if (find_entry (&collate->seq_table, seq->name, namelen,
1212                                   &ptr) != 0)
1213                     {
1214                       uint32_t wcs[2] = { seq->ucs4, 0 };
1215
1216                       /* We have to allocate an entry.  */
1217                       elem = new_element (collate, mbcnt, len,
1218                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1219                                           ? NULL : wcs, seq->name,
1220                                           namelen, 1);
1221
1222                       /* And add it to the table.  */
1223                       if (insert_entry (&collate->seq_table, seq->name,
1224                                         namelen, elem) != 0)
1225                         /* This cannot happen.  */
1226                         assert (! "Internal error");
1227                     }
1228                   else
1229                     /* Copy the result.  */
1230                     elem = ptr;
1231
1232                   /* Test whether this element is not already in the list.  */
1233                   if (elem->next != NULL || (collate->cursor != NULL
1234                                              && elem->next == collate->cursor))
1235                     {
1236                       lr_error (ldfile, _("\
1237 order for `%.*s' already defined at %s:%Zu"),
1238                                 (int) namelen, seq->name,
1239                                 elem->file, elem->line);
1240                       goto increment;
1241                     }
1242
1243                   /* Enqueue the new element.  */
1244                   elem->last = collate->cursor;
1245                   if (collate->cursor == NULL)
1246                     elem->next = NULL;
1247                   else
1248                     {
1249                       elem->next = collate->cursor->next;
1250                       elem->last->next = elem;
1251                       if (elem->next != NULL)
1252                         elem->next->last = elem;
1253                     }
1254                   if (collate->start == NULL)
1255                     {
1256                       assert (collate->cursor == NULL);
1257                       collate->start = elem;
1258                     }
1259                   collate->cursor = elem;
1260
1261                  /* Add the weight value.  We take them from the
1262                     `ellipsis_weights' member of `collate'.  */
1263                   elem->weights = (struct element_list_t *)
1264                     obstack_alloc (&collate->mempool,
1265                                    nrules * sizeof (struct element_list_t));
1266                   for (cnt = 0; cnt < nrules; ++cnt)
1267                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1268                         && (collate->ellipsis_weight.weights[cnt].w[0]
1269                             == ELEMENT_ELLIPSIS2))
1270                       {
1271                         elem->weights[cnt].w = (struct element_t **)
1272                           obstack_alloc (&collate->mempool,
1273                                          sizeof (struct element_t *));
1274                         elem->weights[cnt].w[0] = elem;
1275                         elem->weights[cnt].cnt = 1;
1276                       }
1277                     else
1278                       {
1279                         /* Simply use the weight from `ellipsis_weight'.  */
1280                         elem->weights[cnt].w =
1281                           collate->ellipsis_weight.weights[cnt].w;
1282                         elem->weights[cnt].cnt =
1283                           collate->ellipsis_weight.weights[cnt].cnt;
1284                       }
1285                 }
1286
1287               /* Increment for the next round.  */
1288             increment:
1289               for (cnt = len - 1; cnt >= 0; --cnt)
1290                 if (++mbcnt[cnt] != '\0')
1291                   break;
1292
1293               /* Find out whether this was all.  */
1294               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1295                 /* Yep, that's all.  */
1296                 break;
1297             }
1298         }
1299     }
1300   else
1301     {
1302       /* For symbolic range we naturally must have a beginning and an
1303          end specified by the user.  */
1304       if (startp == NULL)
1305         lr_error (ldfile, _("\
1306 %s: symbolic range ellipsis must not directly follow `order_start'"),
1307                   "LC_COLLATE");
1308       else if (endp == NULL)
1309         lr_error (ldfile, _("\
1310 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1311                   "LC_COLLATE");
1312       else
1313         {
1314           /* Determine the range.  To do so we have to determine the
1315              common prefix of the both names and then the numeric
1316              values of both ends.  */
1317           size_t lenfrom = strlen (startp->name);
1318           size_t lento = strlen (endp->name);
1319           char buf[lento + 1];
1320           int preflen = 0;
1321           long int from;
1322           long int to;
1323           char *cp;
1324           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1325
1326           if (lenfrom != lento)
1327             {
1328             invalid_range:
1329               lr_error (ldfile, _("\
1330 `%s' and `%.*s' are not valid names for symbolic range"),
1331                         startp->name, (int) lento, endp->name);
1332               return;
1333             }
1334
1335           while (startp->name[preflen] == endp->name[preflen])
1336             if (startp->name[preflen] == '\0')
1337               /* Nothing to be done.  The start and end point are identical
1338                  and while inserting the end point we have already given
1339                  the user an error message.  */
1340               return;
1341             else
1342               ++preflen;
1343
1344           errno = 0;
1345           from = strtol (startp->name + preflen, &cp, base);
1346           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1347             goto invalid_range;
1348
1349           errno = 0;
1350           to = strtol (endp->name + preflen, &cp, base);
1351           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1352             goto invalid_range;
1353
1354           /* Copy the prefix.  */
1355           memcpy (buf, startp->name, preflen);
1356
1357           /* Loop over all values.  */
1358           for (++from; from < to; ++from)
1359             {
1360               struct element_t *elem = NULL;
1361               struct charseq *seq;
1362               uint32_t wc;
1363               int cnt;
1364
1365               /* Generate the name.  */
1366               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1367                        (int) (lenfrom - preflen), from);
1368
1369               /* Look whether this name is already defined.  */
1370               void *ptr;
1371               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1372                 {
1373                   /* Copy back the result.  */
1374                   elem = ptr;
1375
1376                   if (elem->next != NULL || (collate->cursor != NULL
1377                                              && elem->next == collate->cursor))
1378                     {
1379                       lr_error (ldfile, _("\
1380 %s: order for `%.*s' already defined at %s:%Zu"),
1381                                 "LC_COLLATE", (int) lenfrom, buf,
1382                                 elem->file, elem->line);
1383                       continue;
1384                     }
1385
1386                   if (elem->name == NULL)
1387                     {
1388                       lr_error (ldfile, _("%s: `%s' must be a character"),
1389                                 "LC_COLLATE", buf);
1390                       continue;
1391                     }
1392                 }
1393
1394               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1395                 {
1396                   /* Search for a character of this name.  */
1397                   seq = charmap_find_value (charmap, buf, lenfrom);
1398                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1399                     {
1400                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1401
1402                       if (seq != NULL)
1403                         seq->ucs4 = wc;
1404                     }
1405                   else
1406                     wc = seq->ucs4;
1407
1408                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1409                     /* We don't know anything about a character with this
1410                        name.  XXX Should we warn?  */
1411                     continue;
1412
1413                   if (elem == NULL)
1414                     {
1415                       uint32_t wcs[2] = { wc, 0 };
1416
1417                       /* We have to allocate an entry.  */
1418                       elem = new_element (collate,
1419                                           seq != NULL
1420                                           ? (char *) seq->bytes : NULL,
1421                                           seq != NULL ? seq->nbytes : 0,
1422                                           wc == ILLEGAL_CHAR_VALUE
1423                                           ? NULL : wcs, buf, lenfrom, 1);
1424                     }
1425                   else
1426                     {
1427                       /* Update the element.  */
1428                       if (seq != NULL)
1429                         {
1430                           elem->mbs = obstack_copy0 (&collate->mempool,
1431                                                      seq->bytes, seq->nbytes);
1432                           elem->nmbs = seq->nbytes;
1433                         }
1434
1435                       if (wc != ILLEGAL_CHAR_VALUE)
1436                         {
1437                           uint32_t zero = 0;
1438
1439                           obstack_grow (&collate->mempool,
1440                                         &wc, sizeof (uint32_t));
1441                           obstack_grow (&collate->mempool,
1442                                         &zero, sizeof (uint32_t));
1443                           elem->wcs = obstack_finish (&collate->mempool);
1444                           elem->nwcs = 1;
1445                         }
1446                     }
1447
1448                   elem->file = ldfile->fname;
1449                   elem->line = ldfile->lineno;
1450                   elem->section = collate->current_section;
1451                 }
1452
1453               /* Enqueue the new element.  */
1454               elem->last = collate->cursor;
1455               elem->next = collate->cursor->next;
1456               elem->last->next = elem;
1457               if (elem->next != NULL)
1458                 elem->next->last = elem;
1459               collate->cursor = elem;
1460
1461               /* Now add the weights.  They come from the `ellipsis_weights'
1462                  member of `collate'.  */
1463               elem->weights = (struct element_list_t *)
1464                 obstack_alloc (&collate->mempool,
1465                                nrules * sizeof (struct element_list_t));
1466               for (cnt = 0; cnt < nrules; ++cnt)
1467                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1468                     && (collate->ellipsis_weight.weights[cnt].w[0]
1469                         == ELEMENT_ELLIPSIS2))
1470                   {
1471                     elem->weights[cnt].w = (struct element_t **)
1472                       obstack_alloc (&collate->mempool,
1473                                      sizeof (struct element_t *));
1474                     elem->weights[cnt].w[0] = elem;
1475                     elem->weights[cnt].cnt = 1;
1476                   }
1477                 else
1478                   {
1479                     /* Simly use the weight from `ellipsis_weight'.  */
1480                     elem->weights[cnt].w =
1481                       collate->ellipsis_weight.weights[cnt].w;
1482                     elem->weights[cnt].cnt =
1483                       collate->ellipsis_weight.weights[cnt].cnt;
1484                   }
1485             }
1486         }
1487     }
1488 }
1489
1490
1491 static void
1492 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1493                  struct localedef_t *copy_locale, int ignore_content)
1494 {
1495   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1496     {
1497       struct locale_collate_t *collate;
1498
1499       if (copy_locale == NULL)
1500         {
1501           collate = locale->categories[LC_COLLATE].collate =
1502             (struct locale_collate_t *)
1503             xcalloc (1, sizeof (struct locale_collate_t));
1504
1505           /* Init the various data structures.  */
1506           init_hash (&collate->elem_table, 100);
1507           init_hash (&collate->sym_table, 100);
1508           init_hash (&collate->seq_table, 500);
1509           obstack_init (&collate->mempool);
1510
1511           collate->col_weight_max = -1;
1512         }
1513       else
1514         /* Reuse the copy_locale's data structures.  */
1515         collate = locale->categories[LC_COLLATE].collate =
1516           copy_locale->categories[LC_COLLATE].collate;
1517     }
1518
1519   ldfile->translate_strings = 0;
1520   ldfile->return_widestr = 0;
1521 }
1522
1523
1524 void
1525 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1526 {
1527   /* Now is the time when we can assign the individual collation
1528      values for all the symbols.  We have possibly different values
1529      for the wide- and the multibyte-character symbols.  This is done
1530      since it might make a difference in the encoding if there is in
1531      some cases no multibyte-character but there are wide-characters.
1532      (The other way around it is not important since theencoded
1533      collation value in the wide-character case is 32 bits wide and
1534      therefore requires no encoding).
1535
1536      The lowest collation value assigned is 2.  Zero is reserved for
1537      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1538      functions and 1 is used to separate the individual passes for the
1539      different rules.
1540
1541      We also have to construct is list with all the bytes/words which
1542      can come first in a sequence, followed by all the elements which
1543      also start with this byte/word.  The order is reverse which has
1544      among others the important effect that longer strings are located
1545      first in the list.  This is required for the output data since
1546      the algorithm used in `strcoll' etc depends on this.
1547
1548      The multibyte case is easy.  We simply sort into an array with
1549      256 elements.  */
1550   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1551   int mbact[nrules];
1552   int wcact;
1553   int mbseqact;
1554   int wcseqact;
1555   struct element_t *runp;
1556   int i;
1557   int need_undefined = 0;
1558   struct section_list *sect;
1559   int ruleidx;
1560   int nr_wide_elems = 0;
1561
1562   if (collate == NULL)
1563     {
1564       /* No data, no check.  */
1565       if (! be_quiet)
1566         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1567                                 "LC_COLLATE"));
1568       return;
1569     }
1570
1571   /* If this assertion is hit change the type in `element_t'.  */
1572   assert (nrules <= sizeof (runp->used_in_level) * 8);
1573
1574   /* Make sure that the `position' rule is used either in all sections
1575      or in none.  */
1576   for (i = 0; i < nrules; ++i)
1577     for (sect = collate->sections; sect != NULL; sect = sect->next)
1578       if (sect != collate->current_section
1579           && sect->rules != NULL
1580           && ((sect->rules[i] & sort_position)
1581               != (collate->current_section->rules[i] & sort_position)))
1582         {
1583           WITH_CUR_LOCALE (error (0, 0, _("\
1584 %s: `position' must be used for a specific level in all sections or none"),
1585                                   "LC_COLLATE"));
1586           break;
1587         }
1588
1589   /* Find out which elements are used at which level.  At the same
1590      time we find out whether we have any undefined symbols.  */
1591   runp = collate->start;
1592   while (runp != NULL)
1593     {
1594       if (runp->mbs != NULL)
1595         {
1596           for (i = 0; i < nrules; ++i)
1597             {
1598               int j;
1599
1600               for (j = 0; j < runp->weights[i].cnt; ++j)
1601                 /* A NULL pointer as the weight means IGNORE.  */
1602                 if (runp->weights[i].w[j] != NULL)
1603                   {
1604                     if (runp->weights[i].w[j]->weights == NULL)
1605                       {
1606                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1607                                                         runp->line,
1608                                                         _("symbol `%s' not defined"),
1609                                                         runp->weights[i].w[j]->name));
1610
1611                         need_undefined = 1;
1612                         runp->weights[i].w[j] = &collate->undefined;
1613                       }
1614                     else
1615                       /* Set the bit for the level.  */
1616                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1617                   }
1618             }
1619         }
1620
1621       /* Up to the next entry.  */
1622       runp = runp->next;
1623     }
1624
1625   /* Walk through the list of defined sequences and assign weights.  Also
1626      create the data structure which will allow generating the single byte
1627      character based tables.
1628
1629      Since at each time only the weights for each of the rules are
1630      only compared to other weights for this rule it is possible to
1631      assign more compact weight values than simply counting all
1632      weights in sequence.  We can assign weights from 3, one for each
1633      rule individually and only for those elements, which are actually
1634      used for this rule.
1635
1636      Why is this important?  It is not for the wide char table.  But
1637      it is for the singlebyte output since here larger numbers have to
1638      be encoded to make it possible to emit the value as a byte
1639      string.  */
1640   for (i = 0; i < nrules; ++i)
1641     mbact[i] = 2;
1642   wcact = 2;
1643   mbseqact = 0;
1644   wcseqact = 0;
1645   runp = collate->start;
1646   while (runp != NULL)
1647     {
1648       /* Determine the order.  */
1649       if (runp->used_in_level != 0)
1650         {
1651           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1652                                                  nrules * sizeof (int));
1653
1654           for (i = 0; i < nrules; ++i)
1655             if ((runp->used_in_level & (1 << i)) != 0)
1656               runp->mborder[i] = mbact[i]++;
1657             else
1658               runp->mborder[i] = 0;
1659         }
1660
1661       if (runp->mbs != NULL)
1662         {
1663           struct element_t **eptr;
1664           struct element_t *lastp = NULL;
1665
1666           /* Find the point where to insert in the list.  */
1667           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1668           while (*eptr != NULL)
1669             {
1670               if ((*eptr)->nmbs < runp->nmbs)
1671                 break;
1672
1673               if ((*eptr)->nmbs == runp->nmbs)
1674                 {
1675                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1676
1677                   if (c == 0)
1678                     {
1679                       /* This should not happen.  It means that we have
1680                          to symbols with the same byte sequence.  It is
1681                          of course an error.  */
1682                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1683                                                       (*eptr)->line,
1684                                                       _("\
1685 symbol `%s' has the same encoding as"), (*eptr)->name);
1686                                        error_at_line (0, 0, runp->file,
1687                                                       runp->line,
1688                                                       _("symbol `%s'"),
1689                                                       runp->name));
1690                       goto dont_insert;
1691                     }
1692                   else if (c < 0)
1693                     /* Insert it here.  */
1694                     break;
1695                 }
1696
1697               /* To the next entry.  */
1698               lastp = *eptr;
1699               eptr = &(*eptr)->mbnext;
1700             }
1701
1702           /* Set the pointers.  */
1703           runp->mbnext = *eptr;
1704           runp->mblast = lastp;
1705           if (*eptr != NULL)
1706             (*eptr)->mblast = runp;
1707           *eptr = runp;
1708         dont_insert:
1709           ;
1710         }
1711
1712       if (runp->used_in_level)
1713         {
1714           runp->wcorder = wcact++;
1715
1716           /* We take the opportunity to count the elements which have
1717              wide characters.  */
1718           ++nr_wide_elems;
1719         }
1720
1721       if (runp->is_character)
1722         {
1723           if (runp->nmbs == 1)
1724             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1725
1726           runp->wcseqorder = wcseqact++;
1727         }
1728       else if (runp->mbs != NULL && runp->weights != NULL)
1729         /* This is for collation elements.  */
1730         runp->wcseqorder = wcseqact++;
1731
1732       /* Up to the next entry.  */
1733       runp = runp->next;
1734     }
1735
1736   /* Find out whether any of the `mbheads' entries is unset.  In this
1737      case we use the UNDEFINED entry.  */
1738   for (i = 1; i < 256; ++i)
1739     if (collate->mbheads[i] == NULL)
1740       {
1741         need_undefined = 1;
1742         collate->mbheads[i] = &collate->undefined;
1743       }
1744
1745   /* Now to the wide character case.  */
1746   collate->wcheads.p = 6;
1747   collate->wcheads.q = 10;
1748   wchead_table_init (&collate->wcheads);
1749
1750   collate->wcseqorder.p = 6;
1751   collate->wcseqorder.q = 10;
1752   collseq_table_init (&collate->wcseqorder);
1753
1754   /* Start adding.  */
1755   runp = collate->start;
1756   while (runp != NULL)
1757     {
1758       if (runp->wcs != NULL)
1759         {
1760           struct element_t *e;
1761           struct element_t **eptr;
1762           struct element_t *lastp;
1763
1764           /* Insert the collation sequence value.  */
1765           if (runp->is_character)
1766             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1767                                runp->wcseqorder);
1768
1769           /* Find the point where to insert in the list.  */
1770           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1771           eptr = &e;
1772           lastp = NULL;
1773           while (*eptr != NULL)
1774             {
1775               if ((*eptr)->nwcs < runp->nwcs)
1776                 break;
1777
1778               if ((*eptr)->nwcs == runp->nwcs)
1779                 {
1780                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1781                                    (wchar_t *) runp->wcs, runp->nwcs);
1782
1783                   if (c == 0)
1784                     {
1785                       /* This should not happen.  It means that we have
1786                          two symbols with the same byte sequence.  It is
1787                          of course an error.  */
1788                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1789                                                       (*eptr)->line,
1790                                                       _("\
1791 symbol `%s' has the same encoding as"), (*eptr)->name);
1792                                        error_at_line (0, 0, runp->file,
1793                                                       runp->line,
1794                                                       _("symbol `%s'"),
1795                                                       runp->name));
1796                       goto dont_insertwc;
1797                     }
1798                   else if (c < 0)
1799                     /* Insert it here.  */
1800                     break;
1801                 }
1802
1803               /* To the next entry.  */
1804               lastp = *eptr;
1805               eptr = &(*eptr)->wcnext;
1806             }
1807
1808           /* Set the pointers.  */
1809           runp->wcnext = *eptr;
1810           runp->wclast = lastp;
1811           if (*eptr != NULL)
1812             (*eptr)->wclast = runp;
1813           *eptr = runp;
1814           if (eptr == &e)
1815             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1816         dont_insertwc:
1817           ;
1818         }
1819
1820       /* Up to the next entry.  */
1821       runp = runp->next;
1822     }
1823
1824   /* Now determine whether the UNDEFINED entry is needed and if yes,
1825      whether it was defined.  */
1826   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1827   if (collate->undefined.file == NULL)
1828     {
1829       if (need_undefined)
1830         {
1831           /* This seems not to be enforced by recent standards.  Don't
1832              emit an error, simply append UNDEFINED at the end.  */
1833           if (0)
1834             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1835
1836           /* Add UNDEFINED at the end.  */
1837           collate->undefined.mborder =
1838             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1839
1840           for (i = 0; i < nrules; ++i)
1841             collate->undefined.mborder[i] = mbact[i]++;
1842         }
1843
1844       /* In any case we will need the definition for the wide character
1845          case.  But we will not complain that it is missing since the
1846          specification strangely enough does not seem to account for
1847          this.  */
1848       collate->undefined.wcorder = wcact++;
1849     }
1850
1851   /* Finally, try to unify the rules for the sections.  Whenever the rules
1852      for a section are the same as those for another section give the
1853      ruleset the same index.  Since there are never many section we can
1854      use an O(n^2) algorithm here.  */
1855   sect = collate->sections;
1856   while (sect != NULL && sect->rules == NULL)
1857     sect = sect->next;
1858
1859   /* Bail out if we have no sections because of earlier errors.  */
1860   if (sect == NULL)
1861     {
1862       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1863                               _("too many errors; giving up")));
1864       return;
1865     }
1866
1867   ruleidx = 0;
1868   do
1869     {
1870       struct section_list *osect = collate->sections;
1871
1872       while (osect != sect)
1873         if (osect->rules != NULL
1874             && memcmp (osect->rules, sect->rules,
1875                        nrules * sizeof (osect->rules[0])) == 0)
1876           break;
1877         else
1878           osect = osect->next;
1879
1880       if (osect == sect)
1881         sect->ruleidx = ruleidx++;
1882       else
1883         sect->ruleidx = osect->ruleidx;
1884
1885       /* Next section.  */
1886       do
1887         sect = sect->next;
1888       while (sect != NULL && sect->rules == NULL);
1889     }
1890   while (sect != NULL);
1891   /* We are currently not prepared for more than 128 rulesets.  But this
1892      should never really be a problem.  */
1893   assert (ruleidx <= 128);
1894 }
1895
1896
1897 static int32_t
1898 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1899                struct element_t *elem)
1900 {
1901   size_t cnt;
1902   int32_t retval;
1903
1904   /* Optimize the use of UNDEFINED.  */
1905   if (elem == &collate->undefined)
1906     /* The weights are already inserted.  */
1907     return 0;
1908
1909   /* This byte can start exactly one collation element and this is
1910      a single byte.  We can directly give the index to the weights.  */
1911   retval = obstack_object_size (pool);
1912
1913   /* Construct the weight.  */
1914   for (cnt = 0; cnt < nrules; ++cnt)
1915     {
1916       char buf[elem->weights[cnt].cnt * 7];
1917       int len = 0;
1918       int i;
1919
1920       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1921         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1922         if (elem->weights[cnt].w[i] != NULL)
1923           len += utf8_encode (&buf[len],
1924                               elem->weights[cnt].w[i]->mborder[cnt]);
1925
1926       /* And add the buffer content.  */
1927       obstack_1grow (pool, len);
1928       obstack_grow (pool, buf, len);
1929     }
1930
1931   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1932 }
1933
1934
1935 static int32_t
1936 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1937                  struct element_t *elem)
1938 {
1939   size_t cnt;
1940   int32_t retval;
1941
1942   /* Optimize the use of UNDEFINED.  */
1943   if (elem == &collate->undefined)
1944     /* The weights are already inserted.  */
1945     return 0;
1946
1947   /* This byte can start exactly one collation element and this is
1948      a single byte.  We can directly give the index to the weights.  */
1949   retval = obstack_object_size (pool) / sizeof (int32_t);
1950
1951   /* Construct the weight.  */
1952   for (cnt = 0; cnt < nrules; ++cnt)
1953     {
1954       int32_t buf[elem->weights[cnt].cnt];
1955       int i;
1956       int32_t j;
1957
1958       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1959         if (elem->weights[cnt].w[i] != NULL)
1960           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1961
1962       /* And add the buffer content.  */
1963       obstack_int32_grow (pool, j);
1964
1965       obstack_grow (pool, buf, j * sizeof (int32_t));
1966       maybe_swap_uint32_obstack (pool, j);
1967     }
1968
1969   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1970 }
1971
1972 /* If localedef is every threaded, this would need to be __thread var.  */
1973 static struct
1974 {
1975   struct obstack *weightpool;
1976   struct obstack *extrapool;
1977   struct obstack *indpool;
1978   struct locale_collate_t *collate;
1979   struct collidx_table *tablewc;
1980 } atwc;
1981
1982 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1983
1984 static void
1985 add_to_tablewc (uint32_t ch, struct element_t *runp)
1986 {
1987   if (runp->wcnext == NULL && runp->nwcs == 1)
1988     {
1989       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1990                                            runp);
1991       collidx_table_add (atwc.tablewc, ch, weigthidx);
1992     }
1993   else
1994     {
1995       /* As for the singlebyte table, we recognize sequences and
1996          compress them.  */
1997
1998       collidx_table_add (atwc.tablewc, ch,
1999                          -(obstack_object_size (atwc.extrapool)
2000                          / sizeof (uint32_t)));
2001
2002       do
2003         {
2004           /* Store the current index in the weight table.  We know that
2005              the current position in the `extrapool' is aligned on a
2006              32-bit address.  */
2007           int32_t weightidx;
2008           int added;
2009
2010           /* Find out wether this is a single entry or we have more than
2011              one consecutive entry.  */
2012           if (runp->wcnext != NULL
2013               && runp->nwcs == runp->wcnext->nwcs
2014               && wmemcmp ((wchar_t *) runp->wcs,
2015                           (wchar_t *)runp->wcnext->wcs,
2016                           runp->nwcs - 1) == 0
2017               && (runp->wcs[runp->nwcs - 1]
2018                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2019             {
2020               int i;
2021               struct element_t *series_startp = runp;
2022               struct element_t *curp;
2023
2024               /* Now add first the initial byte sequence.  */
2025               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2026               if (sizeof (int32_t) == sizeof (int))
2027                 obstack_make_room (atwc.extrapool, added);
2028
2029               /* More than one consecutive entry.  We mark this by having
2030                  a negative index into the indirect table.  */
2031               obstack_int32_grow_fast (atwc.extrapool,
2032                                        -(obstack_object_size (atwc.indpool)
2033                                          / sizeof (int32_t)));
2034               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2035
2036               do
2037                 runp = runp->wcnext;
2038               while (runp->wcnext != NULL
2039                      && runp->nwcs == runp->wcnext->nwcs
2040                      && wmemcmp ((wchar_t *) runp->wcs,
2041                                  (wchar_t *)runp->wcnext->wcs,
2042                                  runp->nwcs - 1) == 0
2043                      && (runp->wcs[runp->nwcs - 1]
2044                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2045
2046               /* Now walk backward from here to the beginning.  */
2047               curp = runp;
2048
2049               for (i = 1; i < runp->nwcs; ++i)
2050                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2051
2052               /* Now find the end of the consecutive sequence and
2053                  add all the indeces in the indirect pool.  */
2054               do
2055                 {
2056                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2057                                                curp);
2058                   obstack_int32_grow (atwc.indpool, weightidx);
2059
2060                   curp = curp->wclast;
2061                 }
2062               while (curp != series_startp);
2063
2064               /* Add the final weight.  */
2065               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2066                                            curp);
2067               obstack_int32_grow (atwc.indpool, weightidx);
2068
2069               /* And add the end byte sequence.  Without length this
2070                  time.  */
2071               for (i = 1; i < curp->nwcs; ++i)
2072                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2073             }
2074           else
2075             {
2076               /* A single entry.  Simply add the index and the length and
2077                  string (except for the first character which is already
2078                  tested for).  */
2079               int i;
2080
2081               /* Output the weight info.  */
2082               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2083                                            runp);
2084
2085               assert (runp->nwcs > 0);
2086               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2087               if (sizeof (int) == sizeof (int32_t))
2088                 obstack_make_room (atwc.extrapool, added);
2089
2090               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2091               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2092               for (i = 1; i < runp->nwcs; ++i)
2093                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2094             }
2095
2096           /* Next entry.  */
2097           runp = runp->wcnext;
2098         }
2099       while (runp != NULL);
2100     }
2101 }
2102
2103 void
2104 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2105                 const char *output_path)
2106 {
2107   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2108   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2109   struct locale_file file;
2110   size_t ch;
2111   int32_t tablemb[256];
2112   struct obstack weightpool;
2113   struct obstack extrapool;
2114   struct obstack indirectpool;
2115   struct section_list *sect;
2116   struct collidx_table tablewc;
2117   uint32_t elem_size;
2118   uint32_t *elem_table;
2119   int i;
2120   struct element_t *runp;
2121
2122   init_locale_data (&file, nelems);
2123   add_locale_uint32 (&file, nrules);
2124
2125   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2126   if (collate == NULL)
2127     {
2128       size_t idx;
2129       for (idx = 1; idx < nelems; idx++)
2130         {
2131           /* The words have to be handled specially.  */
2132           if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2133             add_locale_uint32 (&file, 0);
2134           else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE))
2135             add_locale_uint32 (&file, __cet_other);
2136           else
2137             add_locale_empty (&file);
2138         }
2139       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2140       return;
2141     }
2142
2143   obstack_init (&weightpool);
2144   obstack_init (&extrapool);
2145   obstack_init (&indirectpool);
2146
2147   /* Since we are using the sign of an integer to mark indirection the
2148      offsets in the arrays we are indirectly referring to must not be
2149      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2150   obstack_int32_grow (&extrapool, 0);
2151   obstack_int32_grow (&indirectpool, 0);
2152
2153   /* Prepare the ruleset table.  */
2154   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2155     if (sect->rules != NULL && sect->ruleidx == i)
2156       {
2157         int j;
2158
2159         obstack_make_room (&weightpool, nrules);
2160
2161         for (j = 0; j < nrules; ++j)
2162           obstack_1grow_fast (&weightpool, sect->rules[j]);
2163         ++i;
2164       }
2165   /* And align the output.  */
2166   i = (nrules * i) % LOCFILE_ALIGN;
2167   if (i > 0)
2168     do
2169       obstack_1grow (&weightpool, '\0');
2170     while (++i < LOCFILE_ALIGN);
2171
2172   add_locale_raw_obstack (&file, &weightpool);
2173
2174   /* Generate the 8-bit table.  Walk through the lists of sequences
2175      starting with the same byte and add them one after the other to
2176      the table.  In case we have more than one sequence starting with
2177      the same byte we have to use extra indirection.
2178
2179      First add a record for the NUL byte.  This entry will never be used
2180      so it does not matter.  */
2181   tablemb[0] = 0;
2182
2183   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2184      will probably be used more than once it is good to store the
2185      weights only once.  */
2186   if (collate->undefined.used_in_level != 0)
2187     output_weight (&weightpool, collate, &collate->undefined);
2188
2189   for (ch = 1; ch < 256; ++ch)
2190     if (collate->mbheads[ch]->mbnext == NULL
2191         && collate->mbheads[ch]->nmbs <= 1)
2192       {
2193         tablemb[ch] = output_weight (&weightpool, collate,
2194                                      collate->mbheads[ch]);
2195       }
2196     else
2197       {
2198         /* The entries in the list are sorted by length and then
2199            alphabetically.  This is the order in which we will add the
2200            elements to the collation table.  This allows simply walking
2201            the table in sequence and stopping at the first matching
2202            entry.  Since the longer sequences are coming first in the
2203            list they have the possibility to match first, just as it
2204            has to be.  In the worst case we are walking to the end of
2205            the list where we put, if no singlebyte sequence is defined
2206            in the locale definition, the weights for UNDEFINED.
2207
2208            To reduce the length of the search list we compress them a bit.
2209            This happens by collecting sequences of consecutive byte
2210            sequences in one entry (having and begin and end byte sequence)
2211            and add only one index into the weight table.  We can find the
2212            consecutive entries since they are also consecutive in the list.  */
2213         struct element_t *runp = collate->mbheads[ch];
2214         struct element_t *lastp;
2215
2216         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2217
2218         tablemb[ch] = -obstack_object_size (&extrapool);
2219
2220         do
2221           {
2222             /* Store the current index in the weight table.  We know that
2223                the current position in the `extrapool' is aligned on a
2224                32-bit address.  */
2225             int32_t weightidx;
2226             int added;
2227
2228             /* Find out wether this is a single entry or we have more than
2229                one consecutive entry.  */
2230             if (runp->mbnext != NULL
2231                 && runp->nmbs == runp->mbnext->nmbs
2232                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2233                 && (runp->mbs[runp->nmbs - 1]
2234                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2235               {
2236                 int i;
2237                 struct element_t *series_startp = runp;
2238                 struct element_t *curp;
2239
2240                 /* Compute how much space we will need.  */
2241                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2242                                           + 2 * (runp->nmbs - 1));
2243                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2244                 obstack_make_room (&extrapool, added);
2245
2246                 /* More than one consecutive entry.  We mark this by having
2247                    a negative index into the indirect table.  */
2248                 obstack_int32_grow_fast (&extrapool,
2249                                          -(obstack_object_size (&indirectpool)
2250                                            / sizeof (int32_t)));
2251
2252                 /* Now search first the end of the series.  */
2253                 do
2254                   runp = runp->mbnext;
2255                 while (runp->mbnext != NULL
2256                        && runp->nmbs == runp->mbnext->nmbs
2257                        && memcmp (runp->mbs, runp->mbnext->mbs,
2258                                   runp->nmbs - 1) == 0
2259                        && (runp->mbs[runp->nmbs - 1]
2260                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2261
2262                 /* Now walk backward from here to the beginning.  */
2263                 curp = runp;
2264
2265                 assert (runp->nmbs <= 256);
2266                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2267                 for (i = 1; i < curp->nmbs; ++i)
2268                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2269
2270                 /* Now find the end of the consecutive sequence and
2271                    add all the indeces in the indirect pool.  */
2272                 do
2273                   {
2274                     weightidx = output_weight (&weightpool, collate, curp);
2275                     obstack_int32_grow (&indirectpool, weightidx);
2276
2277                     curp = curp->mblast;
2278                   }
2279                 while (curp != series_startp);
2280
2281                 /* Add the final weight.  */
2282                 weightidx = output_weight (&weightpool, collate, curp);
2283                 obstack_int32_grow (&indirectpool, weightidx);
2284
2285                 /* And add the end byte sequence.  Without length this
2286                    time.  */
2287                 for (i = 1; i < curp->nmbs; ++i)
2288                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2289               }
2290             else
2291               {
2292                 /* A single entry.  Simply add the index and the length and
2293                    string (except for the first character which is already
2294                    tested for).  */
2295                 int i;
2296
2297                 /* Output the weight info.  */
2298                 weightidx = output_weight (&weightpool, collate, runp);
2299
2300                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2301                                           + runp->nmbs - 1);
2302                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2303                 obstack_make_room (&extrapool, added);
2304
2305                 obstack_int32_grow_fast (&extrapool, weightidx);
2306                 assert (runp->nmbs <= 256);
2307                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2308
2309                 for (i = 1; i < runp->nmbs; ++i)
2310                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2311               }
2312
2313             /* Add alignment bytes if necessary.  */
2314             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2315               obstack_1grow_fast (&extrapool, '\0');
2316
2317             /* Next entry.  */
2318             lastp = runp;
2319             runp = runp->mbnext;
2320           }
2321         while (runp != NULL);
2322
2323         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2324
2325         /* If the final entry in the list is not a single character we
2326            add an UNDEFINED entry here.  */
2327         if (lastp->nmbs != 1)
2328           {
2329             int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2330             obstack_make_room (&extrapool, added);
2331
2332             obstack_int32_grow_fast (&extrapool, 0);
2333             /* XXX What rule? We just pick the first.  */
2334             obstack_1grow_fast (&extrapool, 0);
2335             /* Length is zero.  */
2336             obstack_1grow_fast (&extrapool, 0);
2337
2338             /* Add alignment bytes if necessary.  */
2339             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2340               obstack_1grow_fast (&extrapool, '\0');
2341           }
2342       }
2343
2344   /* Add padding to the tables if necessary.  */
2345   while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2346     obstack_1grow (&weightpool, 0);
2347
2348   /* Now add the four tables.  */
2349   add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2350   add_locale_raw_obstack (&file, &weightpool);
2351   add_locale_raw_obstack (&file, &extrapool);
2352   add_locale_raw_obstack (&file, &indirectpool);
2353
2354   /* Now the same for the wide character table.  We need to store some
2355      more information here.  */
2356   add_locale_empty (&file);
2357   add_locale_empty (&file);
2358   add_locale_empty (&file);
2359
2360   /* Since we are using the sign of an integer to mark indirection the
2361      offsets in the arrays we are indirectly referring to must not be
2362      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2363   obstack_int32_grow (&extrapool, 0);
2364   obstack_int32_grow (&indirectpool, 0);
2365
2366   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2367      will probably be used more than once it is good to store the
2368      weights only once.  */
2369   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2370     abort ();
2371
2372   /* Generate the table.  Walk through the lists of sequences starting
2373      with the same wide character and add them one after the other to
2374      the table.  In case we have more than one sequence starting with
2375      the same byte we have to use extra indirection.  */
2376   tablewc.p = 6;
2377   tablewc.q = 10;
2378   collidx_table_init (&tablewc);
2379
2380   atwc.weightpool = &weightpool;
2381   atwc.extrapool = &extrapool;
2382   atwc.indpool = &indirectpool;
2383   atwc.collate = collate;
2384   atwc.tablewc = &tablewc;
2385
2386   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2387
2388   memset (&atwc, 0, sizeof (atwc));
2389
2390   /* Now add the four tables.  */
2391   add_locale_collidx_table (&file, &tablewc);
2392   add_locale_raw_obstack (&file, &weightpool);
2393   add_locale_raw_obstack (&file, &extrapool);
2394   add_locale_raw_obstack (&file, &indirectpool);
2395
2396   /* Finally write the table with collation element names out.  It is
2397      a hash table with a simple function which gets the name of the
2398      character as the input.  One character might have many names.  The
2399      value associated with the name is an index into the weight table
2400      where we are then interested in the first-level weight value.
2401
2402      To determine how large the table should be we are counting the
2403      elements have to put in.  Since we are using internal chaining
2404      using a secondary hash function we have to make the table a bit
2405      larger to avoid extremely long search times.  We can achieve
2406      good results with a 40% larger table than there are entries.  */
2407   elem_size = 0;
2408   runp = collate->start;
2409   while (runp != NULL)
2410     {
2411       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2412         /* Yep, the element really counts.  */
2413         ++elem_size;
2414
2415       runp = runp->next;
2416     }
2417   /* Add 40% and find the next prime number.  */
2418   elem_size = next_prime (elem_size * 1.4);
2419
2420   /* Allocate the table.  Each entry consists of two words: the hash
2421      value and an index in a secondary table which provides the index
2422      into the weight table and the string itself (so that a match can
2423      be determined).  */
2424   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2425                                            elem_size * 2 * sizeof (uint32_t));
2426   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2427
2428   /* Now add the elements.  */
2429   runp = collate->start;
2430   while (runp != NULL)
2431     {
2432       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2433         {
2434           /* Compute the hash value of the name.  */
2435           uint32_t namelen = strlen (runp->name);
2436           uint32_t hash = elem_hash (runp->name, namelen);
2437           size_t idx = hash % elem_size;
2438 #ifndef NDEBUG
2439           size_t start_idx = idx;
2440 #endif
2441
2442           if (elem_table[idx * 2] != 0)
2443             {
2444               /* The spot is already taken.  Try iterating using the value
2445                  from the secondary hashing function.  */
2446               size_t iter = hash % (elem_size - 2) + 1;
2447
2448               do
2449                 {
2450                   idx += iter;
2451                   if (idx >= elem_size)
2452                     idx -= elem_size;
2453                   assert (idx != start_idx);
2454                 }
2455               while (elem_table[idx * 2] != 0);
2456             }
2457           /* This is the spot where we will insert the value.  */
2458           elem_table[idx * 2] = hash;
2459           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2460
2461           /* The string itself including length.  */
2462           obstack_1grow (&extrapool, namelen);
2463           obstack_grow (&extrapool, runp->name, namelen);
2464
2465           /* And the multibyte representation.  */
2466           obstack_1grow (&extrapool, runp->nmbs);
2467           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2468
2469           /* And align again to 32 bits.  */
2470           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2471             obstack_grow (&extrapool, "\0\0",
2472                           (sizeof (int32_t)
2473                            - ((1 + namelen + 1 + runp->nmbs)
2474                               % sizeof (int32_t))));
2475
2476           /* Now some 32-bit values: multibyte collation sequence,
2477              wide char string (including length), and wide char
2478              collation sequence.  */
2479           obstack_int32_grow (&extrapool, runp->mbseqorder);
2480
2481           obstack_int32_grow (&extrapool, runp->nwcs);
2482           obstack_grow (&extrapool, runp->wcs,
2483                         runp->nwcs * sizeof (uint32_t));
2484           maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2485
2486           obstack_int32_grow (&extrapool, runp->wcseqorder);
2487         }
2488
2489       runp = runp->next;
2490     }
2491
2492   /* Prepare to write out this data.  */
2493   add_locale_uint32 (&file, elem_size);
2494   add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2495   add_locale_raw_obstack (&file, &extrapool);
2496   add_locale_raw_data (&file, collate->mbseqorder, 256);
2497   add_locale_collseq_table (&file, &collate->wcseqorder);
2498   add_locale_string (&file, charmap->code_set_name);
2499   if (strcmp (charmap->code_set_name, "UTF-8") == 0)
2500     add_locale_uint32 (&file, __cet_utf8);
2501   else if (charmap->mb_cur_max == 1)
2502     add_locale_uint32 (&file, __cet_8bit);
2503   else
2504     add_locale_uint32 (&file, __cet_other);
2505   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2506
2507   obstack_free (&weightpool, NULL);
2508   obstack_free (&extrapool, NULL);
2509   obstack_free (&indirectpool, NULL);
2510 }
2511
2512
2513 static enum token_t
2514 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2515          const struct charmap_t *charmap, int to_endif)
2516 {
2517   while (1)
2518     {
2519       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2520       enum token_t nowtok = now->tok;
2521
2522       if (nowtok == tok_eof || nowtok == tok_end)
2523         return nowtok;
2524
2525       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2526         {
2527           lr_error (ldfile, _("%s: nested conditionals not supported"),
2528                     "LC_COLLATE");
2529           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2530           if (nowtok == tok_eof || nowtok == tok_end)
2531             return nowtok;
2532         }
2533       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2534         {
2535           lr_ignore_rest (ldfile, 1);
2536           return nowtok;
2537         }
2538       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2539         {
2540           /* Do not read the rest of the line.  */
2541           return nowtok;
2542         }
2543       else if (nowtok == tok_else)
2544         {
2545           lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2546         }
2547
2548       lr_ignore_rest (ldfile, 0);
2549     }
2550 }
2551
2552
2553 void
2554 collate_read (struct linereader *ldfile, struct localedef_t *result,
2555               const struct charmap_t *charmap, const char *repertoire_name,
2556               int ignore_content)
2557 {
2558   struct repertoire_t *repertoire = NULL;
2559   struct locale_collate_t *collate;
2560   struct token *now;
2561   struct token *arg = NULL;
2562   enum token_t nowtok;
2563   enum token_t was_ellipsis = tok_none;
2564   struct localedef_t *copy_locale = NULL;
2565   /* Parsing state:
2566      0 - start
2567      1 - between `order-start' and `order-end'
2568      2 - after `order-end'
2569      3 - after `reorder-after', waiting for `reorder-end'
2570      4 - after `reorder-end'
2571      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572      6 - after `reorder-sections-end'
2573   */
2574   int state = 0;
2575
2576   /* Get the repertoire we have to use.  */
2577   if (repertoire_name != NULL)
2578     repertoire = repertoire_read (repertoire_name);
2579
2580   /* The rest of the line containing `LC_COLLATE' must be free.  */
2581   lr_ignore_rest (ldfile, 1);
2582
2583   while (1)
2584     {
2585       do
2586         {
2587           now = lr_token (ldfile, charmap, result, NULL, verbose);
2588           nowtok = now->tok;
2589         }
2590       while (nowtok == tok_eol);
2591
2592       if (nowtok != tok_define)
2593         break;
2594
2595       if (ignore_content)
2596         lr_ignore_rest (ldfile, 0);
2597       else
2598         {
2599           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2600           if (arg->tok != tok_ident)
2601             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2602           else
2603             {
2604               /* Simply add the new symbol.  */
2605               struct name_list *newsym = xmalloc (sizeof (*newsym)
2606                                                   + arg->val.str.lenmb + 1);
2607               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2608               newsym->str[arg->val.str.lenmb] = '\0';
2609               newsym->next = defined;
2610               defined = newsym;
2611
2612               lr_ignore_rest (ldfile, 1);
2613             }
2614         }
2615     }
2616
2617   if (nowtok == tok_copy)
2618     {
2619       now = lr_token (ldfile, charmap, result, NULL, verbose);
2620       if (now->tok != tok_string)
2621         {
2622           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2623
2624         skip_category:
2625           do
2626             now = lr_token (ldfile, charmap, result, NULL, verbose);
2627           while (now->tok != tok_eof && now->tok != tok_end);
2628
2629           if (now->tok != tok_eof
2630               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2631                   now->tok == tok_eof))
2632             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2633           else if (now->tok != tok_lc_collate)
2634             {
2635               lr_error (ldfile, _("\
2636 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637               lr_ignore_rest (ldfile, 0);
2638             }
2639           else
2640             lr_ignore_rest (ldfile, 1);
2641
2642           return;
2643         }
2644
2645       if (! ignore_content)
2646         {
2647           /* Get the locale definition.  */
2648           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2649                                      repertoire_name, charmap, NULL);
2650           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2651             {
2652               /* Not yet loaded.  So do it now.  */
2653               if (locfile_read (copy_locale, charmap) != 0)
2654                 goto skip_category;
2655             }
2656
2657           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2658             return;
2659         }
2660
2661       lr_ignore_rest (ldfile, 1);
2662
2663       now = lr_token (ldfile, charmap, result, NULL, verbose);
2664       nowtok = now->tok;
2665     }
2666
2667   /* Prepare the data structures.  */
2668   collate_startup (ldfile, result, copy_locale, ignore_content);
2669   collate = result->categories[LC_COLLATE].collate;
2670
2671   while (1)
2672     {
2673       char ucs4buf[10];
2674       char *symstr;
2675       size_t symlen;
2676
2677       /* Of course we don't proceed beyond the end of file.  */
2678       if (nowtok == tok_eof)
2679         break;
2680
2681       /* Ingore empty lines.  */
2682       if (nowtok == tok_eol)
2683         {
2684           now = lr_token (ldfile, charmap, result, NULL, verbose);
2685           nowtok = now->tok;
2686           continue;
2687         }
2688
2689       switch (nowtok)
2690         {
2691         case tok_copy:
2692           /* Allow copying other locales.  */
2693           now = lr_token (ldfile, charmap, result, NULL, verbose);
2694           if (now->tok != tok_string)
2695             goto err_label;
2696
2697           if (! ignore_content)
2698             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2699                          charmap, result);
2700
2701           lr_ignore_rest (ldfile, 1);
2702           break;
2703
2704         case tok_coll_weight_max:
2705           /* Ignore the rest of the line if we don't need the input of
2706              this line.  */
2707           if (ignore_content)
2708             {
2709               lr_ignore_rest (ldfile, 0);
2710               break;
2711             }
2712
2713           if (state != 0)
2714             goto err_label;
2715
2716           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2717           if (arg->tok != tok_number)
2718             goto err_label;
2719           if (collate->col_weight_max != -1)
2720             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2721                       "LC_COLLATE", "col_weight_max");
2722           else
2723             collate->col_weight_max = arg->val.num;
2724           lr_ignore_rest (ldfile, 1);
2725           break;
2726
2727         case tok_section_symbol:
2728           /* Ignore the rest of the line if we don't need the input of
2729              this line.  */
2730           if (ignore_content)
2731             {
2732               lr_ignore_rest (ldfile, 0);
2733               break;
2734             }
2735
2736           if (state != 0)
2737             goto err_label;
2738
2739           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2740           if (arg->tok != tok_bsymbol)
2741             goto err_label;
2742           else if (!ignore_content)
2743             {
2744               /* Check whether this section is already known.  */
2745               struct section_list *known = collate->sections;
2746               while (known != NULL)
2747                 {
2748                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2749                     break;
2750                   known = known->next;
2751                 }
2752
2753               if (known != NULL)
2754                 {
2755                   lr_error (ldfile,
2756                             _("%s: duplicate declaration of section `%s'"),
2757                             "LC_COLLATE", arg->val.str.startmb);
2758                   free (arg->val.str.startmb);
2759                 }
2760               else
2761                 collate->sections = make_seclist_elem (collate,
2762                                                        arg->val.str.startmb,
2763                                                        collate->sections);
2764
2765               lr_ignore_rest (ldfile, known == NULL);
2766             }
2767           else
2768             {
2769               free (arg->val.str.startmb);
2770               lr_ignore_rest (ldfile, 0);
2771             }
2772           break;
2773
2774         case tok_collating_element:
2775           /* Ignore the rest of the line if we don't need the input of
2776              this line.  */
2777           if (ignore_content)
2778             {
2779               lr_ignore_rest (ldfile, 0);
2780               break;
2781             }
2782
2783           if (state != 0 && state != 2)
2784             goto err_label;
2785
2786           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2787           if (arg->tok != tok_bsymbol)
2788             goto err_label;
2789           else
2790             {
2791               const char *symbol = arg->val.str.startmb;
2792               size_t symbol_len = arg->val.str.lenmb;
2793
2794               /* Next the `from' keyword.  */
2795               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2796               if (arg->tok != tok_from)
2797                 {
2798                   free ((char *) symbol);
2799                   goto err_label;
2800                 }
2801
2802               ldfile->return_widestr = 1;
2803               ldfile->translate_strings = 1;
2804
2805               /* Finally the string with the replacement.  */
2806               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2807
2808               ldfile->return_widestr = 0;
2809               ldfile->translate_strings = 0;
2810
2811               if (arg->tok != tok_string)
2812                 goto err_label;
2813
2814               if (!ignore_content && symbol != NULL)
2815                 {
2816                   /* The name is already defined.  */
2817                   if (check_duplicate (ldfile, collate, charmap,
2818                                        repertoire, symbol, symbol_len))
2819                     goto col_elem_free;
2820
2821                   if (arg->val.str.startmb != NULL)
2822                     insert_entry (&collate->elem_table, symbol, symbol_len,
2823                                   new_element (collate,
2824                                                arg->val.str.startmb,
2825                                                arg->val.str.lenmb - 1,
2826                                                arg->val.str.startwc,
2827                                                symbol, symbol_len, 0));
2828                 }
2829               else
2830                 {
2831                 col_elem_free:
2832                   free ((char *) symbol);
2833                   free (arg->val.str.startmb);
2834                   free (arg->val.str.startwc);
2835                 }
2836               lr_ignore_rest (ldfile, 1);
2837             }
2838           break;
2839
2840         case tok_collating_symbol:
2841           /* Ignore the rest of the line if we don't need the input of
2842              this line.  */
2843           if (ignore_content)
2844             {
2845               lr_ignore_rest (ldfile, 0);
2846               break;
2847             }
2848
2849           if (state != 0 && state != 2)
2850             goto err_label;
2851
2852           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2853           if (arg->tok != tok_bsymbol)
2854             goto err_label;
2855           else
2856             {
2857               char *symbol = arg->val.str.startmb;
2858               size_t symbol_len = arg->val.str.lenmb;
2859               char *endsymbol = NULL;
2860               size_t endsymbol_len = 0;
2861               enum token_t ellipsis = tok_none;
2862
2863               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2864               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2865                 {
2866                   ellipsis = arg->tok;
2867
2868                   arg = lr_token (ldfile, charmap, result, repertoire,
2869                                   verbose);
2870                   if (arg->tok != tok_bsymbol)
2871                     {
2872                       free (symbol);
2873                       goto err_label;
2874                     }
2875
2876                   endsymbol = arg->val.str.startmb;
2877                   endsymbol_len = arg->val.str.lenmb;
2878
2879                   lr_ignore_rest (ldfile, 1);
2880                 }
2881               else if (arg->tok != tok_eol)
2882                 {
2883                   free (symbol);
2884                   goto err_label;
2885                 }
2886
2887               if (!ignore_content)
2888                 {
2889                   if (symbol == NULL
2890                       || (ellipsis != tok_none && endsymbol == NULL))
2891                     {
2892                       lr_error (ldfile, _("\
2893 %s: unknown character in collating symbol name"),
2894                                 "LC_COLLATE");
2895                       goto col_sym_free;
2896                     }
2897                   else if (ellipsis == tok_none)
2898                     {
2899                       /* A single symbol, no ellipsis.  */
2900                       if (check_duplicate (ldfile, collate, charmap,
2901                                            repertoire, symbol, symbol_len))
2902                         /* The name is already defined.  */
2903                         goto col_sym_free;
2904
2905                       insert_entry (&collate->sym_table, symbol, symbol_len,
2906                                     new_symbol (collate, symbol, symbol_len));
2907                     }
2908                   else if (symbol_len != endsymbol_len)
2909                     {
2910                     col_sym_inv_range:
2911                       lr_error (ldfile,
2912                                 _("invalid names for character range"));
2913                       goto col_sym_free;
2914                     }
2915                   else
2916                     {
2917                       /* Oh my, we have to handle an ellipsis.  First, as
2918                          usual, determine the common prefix and then
2919                          convert the rest into a range.  */
2920                       size_t prefixlen;
2921                       unsigned long int from;
2922                       unsigned long int to;
2923                       char *endp;
2924
2925                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2926                         if (symbol[prefixlen] != endsymbol[prefixlen])
2927                           break;
2928
2929                       /* Convert the rest into numbers.  */
2930                       symbol[symbol_len] = '\0';
2931                       from = strtoul (&symbol[prefixlen], &endp,
2932                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2933                       if (*endp != '\0')
2934                         goto col_sym_inv_range;
2935
2936                       endsymbol[symbol_len] = '\0';
2937                       to = strtoul (&endsymbol[prefixlen], &endp,
2938                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2939                       if (*endp != '\0')
2940                         goto col_sym_inv_range;
2941
2942                       if (from > to)
2943                         goto col_sym_inv_range;
2944
2945                       /* Now loop over all entries.  */
2946                       while (from <= to)
2947                         {
2948                           char *symbuf;
2949
2950                           symbuf = (char *) obstack_alloc (&collate->mempool,
2951                                                            symbol_len + 1);
2952
2953                           /* Create the name.  */
2954                           sprintf (symbuf,
2955                                    ellipsis == tok_ellipsis2
2956                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2957                                    (int) prefixlen, symbol,
2958                                    (int) (symbol_len - prefixlen), from);
2959
2960                           if (check_duplicate (ldfile, collate, charmap,
2961                                                repertoire, symbuf, symbol_len))
2962                             /* The name is already defined.  */
2963                             goto col_sym_free;
2964
2965                           insert_entry (&collate->sym_table, symbuf,
2966                                         symbol_len,
2967                                         new_symbol (collate, symbuf,
2968                                                     symbol_len));
2969
2970                           /* Increment the counter.  */
2971                           ++from;
2972                         }
2973
2974                       goto col_sym_free;
2975                     }
2976                 }
2977               else
2978                 {
2979                 col_sym_free:
2980                   free (symbol);
2981                   free (endsymbol);
2982                 }
2983             }
2984           break;
2985
2986         case tok_symbol_equivalence:
2987           /* Ignore the rest of the line if we don't need the input of
2988              this line.  */
2989           if (ignore_content)
2990             {
2991               lr_ignore_rest (ldfile, 0);
2992               break;
2993             }
2994
2995           if (state != 0)
2996             goto err_label;
2997
2998           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2999           if (arg->tok != tok_bsymbol)
3000             goto err_label;
3001           else
3002             {
3003               const char *newname = arg->val.str.startmb;
3004               size_t newname_len = arg->val.str.lenmb;
3005               const char *symname;
3006               size_t symname_len;
3007               void *symval;     /* Actually struct symbol_t*  */
3008
3009               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3010               if (arg->tok != tok_bsymbol)
3011                 {
3012                   free ((char *) newname);
3013                   goto err_label;
3014                 }
3015
3016               symname = arg->val.str.startmb;
3017               symname_len = arg->val.str.lenmb;
3018
3019               if (newname == NULL)
3020                 {
3021                   lr_error (ldfile, _("\
3022 %s: unknown character in equivalent definition name"),
3023                             "LC_COLLATE");
3024
3025                 sym_equiv_free:
3026                   free ((char *) newname);
3027                   free ((char *) symname);
3028                   break;
3029                 }
3030               if (symname == NULL)
3031                 {
3032                   lr_error (ldfile, _("\
3033 %s: unknown character in equivalent definition value"),
3034                             "LC_COLLATE");
3035                   goto sym_equiv_free;
3036                 }
3037
3038               /* See whether the symbol name is already defined.  */
3039               if (find_entry (&collate->sym_table, symname, symname_len,
3040                               &symval) != 0)
3041                 {
3042                   lr_error (ldfile, _("\
3043 %s: unknown symbol `%s' in equivalent definition"),
3044                             "LC_COLLATE", symname);
3045                   goto sym_equiv_free;
3046                 }
3047
3048               if (insert_entry (&collate->sym_table,
3049                                 newname, newname_len, symval) < 0)
3050                 {
3051                   lr_error (ldfile, _("\
3052 error while adding equivalent collating symbol"));
3053                   goto sym_equiv_free;
3054                 }
3055
3056               free ((char *) symname);
3057             }
3058           lr_ignore_rest (ldfile, 1);
3059           break;
3060
3061         case tok_script:
3062           /* Ignore the rest of the line if we don't need the input of
3063              this line.  */
3064           if (ignore_content)
3065             {
3066               lr_ignore_rest (ldfile, 0);
3067               break;
3068             }
3069
3070           /* We get told about the scripts we know.  */
3071           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3072           if (arg->tok != tok_bsymbol)
3073             goto err_label;
3074           else
3075             {
3076               struct section_list *runp = collate->known_sections;
3077               char *name;
3078
3079               while (runp != NULL)
3080                 if (strncmp (runp->name, arg->val.str.startmb,
3081                              arg->val.str.lenmb) == 0
3082                     && runp->name[arg->val.str.lenmb] == '\0')
3083                   break;
3084                 else
3085                   runp = runp->def_next;
3086
3087               if (runp != NULL)
3088                 {
3089                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3090                             runp->name);
3091                   lr_ignore_rest (ldfile, 0);
3092                   break;
3093                 }
3094
3095               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3096               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3097               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3098               name[arg->val.str.lenmb] = '\0';
3099               runp->name = name;
3100
3101               runp->def_next = collate->known_sections;
3102               collate->known_sections = runp;
3103             }
3104           lr_ignore_rest (ldfile, 1);
3105           break;
3106
3107         case tok_order_start:
3108           /* Ignore the rest of the line if we don't need the input of
3109              this line.  */
3110           if (ignore_content)
3111             {
3112               lr_ignore_rest (ldfile, 0);
3113               break;
3114             }
3115
3116           if (state != 0 && state != 1 && state != 2)
3117             goto err_label;
3118           state = 1;
3119
3120           /* The 14652 draft does not specify whether all `order_start' lines
3121              must contain the same number of sort-rules, but 14651 does.  So
3122              we require this here as well.  */
3123           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3124           if (arg->tok == tok_bsymbol)
3125             {
3126               /* This better should be a section name.  */
3127               struct section_list *sp = collate->known_sections;
3128               while (sp != NULL
3129                      && (sp->name == NULL
3130                          || strncmp (sp->name, arg->val.str.startmb,
3131                                      arg->val.str.lenmb) != 0
3132                          || sp->name[arg->val.str.lenmb] != '\0'))
3133                 sp = sp->def_next;
3134
3135               if (sp == NULL)
3136                 {
3137                   lr_error (ldfile, _("\
3138 %s: unknown section name `%.*s'"),
3139                             "LC_COLLATE", (int) arg->val.str.lenmb,
3140                             arg->val.str.startmb);
3141                   /* We use the error section.  */
3142                   collate->current_section = &collate->error_section;
3143
3144                   if (collate->error_section.first == NULL)
3145                     {
3146                       /* Insert &collate->error_section at the end of
3147                          the collate->sections list.  */
3148                       if (collate->sections == NULL)
3149                         collate->sections = &collate->error_section;
3150                       else
3151                         {
3152                           sp = collate->sections;
3153                           while (sp->next != NULL)
3154                             sp = sp->next;
3155
3156                           sp->next = &collate->error_section;
3157                         }
3158                       collate->error_section.next = NULL;
3159                     }
3160                 }
3161               else
3162                 {
3163                   /* One should not be allowed to open the same
3164                      section twice.  */
3165                   if (sp->first != NULL)
3166                     lr_error (ldfile, _("\
3167 %s: multiple order definitions for section `%s'"),
3168                               "LC_COLLATE", sp->name);
3169                   else
3170                     {
3171                       /* Insert sp in the collate->sections list,
3172                          right after collate->current_section.  */
3173                       if (collate->current_section != NULL)
3174                         {
3175                           sp->next = collate->current_section->next;
3176                           collate->current_section->next = sp;
3177                         }
3178                       else if (collate->sections == NULL)
3179                         /* This is the first section to be defined.  */
3180                         collate->sections = sp;
3181
3182                       collate->current_section = sp;
3183                     }
3184
3185                   /* Next should come the end of the line or a semicolon.  */
3186                   arg = lr_token (ldfile, charmap, result, repertoire,
3187                                   verbose);
3188                   if (arg->tok == tok_eol)
3189                     {
3190                       uint32_t cnt;
3191
3192                       /* This means we have exactly one rule: `forward'.  */
3193                       if (nrules > 1)
3194                         lr_error (ldfile, _("\
3195 %s: invalid number of sorting rules"),
3196                                   "LC_COLLATE");
3197                       else
3198                         nrules = 1;
3199                       sp->rules = obstack_alloc (&collate->mempool,
3200                                                  (sizeof (enum coll_sort_rule)
3201                                                   * nrules));
3202                       for (cnt = 0; cnt < nrules; ++cnt)
3203                         sp->rules[cnt] = sort_forward;
3204
3205                       /* Next line.  */
3206                       break;
3207                     }
3208
3209                   /* Get the next token.  */
3210                   arg = lr_token (ldfile, charmap, result, repertoire,
3211                                   verbose);
3212                 }
3213             }
3214           else
3215             {
3216               /* There is no section symbol.  Therefore we use the unnamed
3217                  section.  */
3218               collate->current_section = &collate->unnamed_section;
3219
3220               if (collate->unnamed_section_defined)
3221                 lr_error (ldfile, _("\
3222 %s: multiple order definitions for unnamed section"),
3223                           "LC_COLLATE");
3224               else
3225                 {
3226                   /* Insert &collate->unnamed_section at the beginning of
3227                      the collate->sections list.  */
3228                   collate->unnamed_section.next = collate->sections;
3229                   collate->sections = &collate->unnamed_section;
3230                   collate->unnamed_section_defined = true;
3231                 }
3232             }
3233
3234           /* Now read the direction names.  */
3235           read_directions (ldfile, arg, charmap, repertoire, result);
3236
3237           /* From now we need the strings untranslated.  */
3238           ldfile->translate_strings = 0;
3239           break;
3240
3241         case tok_order_end:
3242           /* Ignore the rest of the line if we don't need the input of
3243              this line.  */
3244           if (ignore_content)
3245             {
3246               lr_ignore_rest (ldfile, 0);
3247               break;
3248             }
3249
3250           if (state != 1)
3251             goto err_label;
3252
3253           /* Handle ellipsis at end of list.  */
3254           if (was_ellipsis != tok_none)
3255             {
3256               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3257                                repertoire, result);
3258               was_ellipsis = tok_none;
3259             }
3260
3261           state = 2;
3262           lr_ignore_rest (ldfile, 1);
3263           break;
3264
3265         case tok_reorder_after:
3266           /* Ignore the rest of the line if we don't need the input of
3267              this line.  */
3268           if (ignore_content)
3269             {
3270               lr_ignore_rest (ldfile, 0);
3271               break;
3272             }
3273
3274           if (state == 1)
3275             {
3276               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3277                         "LC_COLLATE");
3278               state = 2;
3279
3280               /* Handle ellipsis at end of list.  */
3281               if (was_ellipsis != tok_none)
3282                 {
3283                   handle_ellipsis (ldfile, arg->val.str.startmb,
3284                                    arg->val.str.lenmb, was_ellipsis, charmap,
3285                                    repertoire, result);
3286                   was_ellipsis = tok_none;
3287                 }
3288             }
3289           else if (state == 0 && copy_locale == NULL)
3290             goto err_label;
3291           else if (state != 0 && state != 2 && state != 3)
3292             goto err_label;
3293           state = 3;
3294
3295           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3296           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3297             {
3298               /* Find this symbol in the sequence table.  */
3299               char ucsbuf[10];
3300               char *startmb;
3301               size_t lenmb;
3302               struct element_t *insp;
3303               int no_error = 1;
3304               void *ptr;
3305
3306               if (arg->tok == tok_bsymbol)
3307                 {
3308                   startmb = arg->val.str.startmb;
3309                   lenmb = arg->val.str.lenmb;
3310                 }
3311               else
3312                 {
3313                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3314                   startmb = ucsbuf;
3315                   lenmb = 9;
3316                 }
3317
3318               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3319                 /* Yes, the symbol exists.  Simply point the cursor
3320                    to it.  */
3321                 collate->cursor = (struct element_t *) ptr;
3322               else
3323                 {
3324                   struct symbol_t *symbp;
3325                   void *ptr;
3326
3327                   if (find_entry (&collate->sym_table, startmb, lenmb,
3328                                   &ptr) == 0)
3329                     {
3330                       symbp = ptr;
3331
3332                       if (symbp->order->last != NULL
3333                           || symbp->order->next != NULL)
3334                         collate->cursor = symbp->order;
3335                       else
3336                         {
3337                           /* This is a collating symbol but its position
3338                              is not yet defined.  */
3339                           lr_error (ldfile, _("\
3340 %s: order for collating symbol %.*s not yet defined"),
3341                                     "LC_COLLATE", (int) lenmb, startmb);
3342                           collate->cursor = NULL;
3343                           no_error = 0;
3344                         }
3345                     }
3346                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3347                                        &ptr) == 0)
3348                     {
3349                       insp = (struct element_t *) ptr;
3350
3351                       if (insp->last != NULL || insp->next != NULL)
3352                         collate->cursor = insp;
3353                       else
3354                         {
3355                           /* This is a collating element but its position
3356                              is not yet defined.  */
3357                           lr_error (ldfile, _("\
3358 %s: order for collating element %.*s not yet defined"),
3359                                     "LC_COLLATE", (int) lenmb, startmb);
3360                           collate->cursor = NULL;
3361                           no_error = 0;
3362                         }
3363                     }
3364                   else
3365                     {
3366                       /* This is bad.  The symbol after which we have to
3367                          insert does not exist.  */
3368                       lr_error (ldfile, _("\
3369 %s: cannot reorder after %.*s: symbol not known"),
3370                                 "LC_COLLATE", (int) lenmb, startmb);
3371                       collate->cursor = NULL;
3372                       no_error = 0;
3373                     }
3374                 }
3375
3376               lr_ignore_rest (ldfile, no_error);
3377             }
3378           else
3379             /* This must not happen.  */
3380             goto err_label;
3381           break;
3382
3383         case tok_reorder_end:
3384           /* Ignore the rest of the line if we don't need the input of
3385              this line.  */
3386           if (ignore_content)
3387             break;
3388
3389           if (state != 3)
3390             goto err_label;
3391           state = 4;
3392           lr_ignore_rest (ldfile, 1);
3393           break;
3394
3395         case tok_reorder_sections_after:
3396           /* Ignore the rest of the line if we don't need the input of
3397              this line.  */
3398           if (ignore_content)
3399             {
3400               lr_ignore_rest (ldfile, 0);
3401               break;
3402             }
3403
3404           if (state == 1)
3405             {
3406               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3407                         "LC_COLLATE");
3408               state = 2;
3409
3410               /* Handle ellipsis at end of list.  */
3411               if (was_ellipsis != tok_none)
3412                 {
3413                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3414                                    repertoire, result);
3415                   was_ellipsis = tok_none;
3416                 }
3417             }
3418           else if (state == 3)
3419             {
3420               WITH_CUR_LOCALE (error (0, 0, _("\
3421 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3422               state = 4;
3423             }
3424           else if (state != 2 && state != 4)
3425             goto err_label;
3426           state = 5;
3427
3428           /* Get the name of the sections we are adding after.  */
3429           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3430           if (arg->tok == tok_bsymbol)
3431             {
3432               /* Now find a section with this name.  */
3433               struct section_list *runp = collate->sections;
3434
3435               while (runp != NULL)
3436                 {
3437                   if (runp->name != NULL
3438                       && strlen (runp->name) == arg->val.str.lenmb
3439                       && memcmp (runp->name, arg->val.str.startmb,
3440                                  arg->val.str.lenmb) == 0)
3441                     break;
3442
3443                   runp = runp->next;
3444                 }
3445
3446               if (runp != NULL)
3447                 collate->current_section = runp;
3448               else
3449                 {
3450                   /* This is bad.  The section after which we have to
3451                      reorder does not exist.  Therefore we cannot
3452                      process the whole rest of this reorder
3453                      specification.  */
3454                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3455                             "LC_COLLATE", (int) arg->val.str.lenmb,
3456                             arg->val.str.startmb);
3457
3458                   do
3459                     {
3460                       lr_ignore_rest (ldfile, 0);
3461
3462                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3463                     }
3464                   while (now->tok == tok_reorder_sections_after
3465                          || now->tok == tok_reorder_sections_end
3466                          || now->tok == tok_end);
3467
3468                   /* Process the token we just saw.  */
3469                   nowtok = now->tok;
3470                   continue;
3471                 }
3472             }
3473           else
3474             /* This must not happen.  */
3475             goto err_label;
3476           break;
3477
3478         case tok_reorder_sections_end:
3479           /* Ignore the rest of the line if we don't need the input of
3480              this line.  */
3481           if (ignore_content)
3482             break;
3483
3484           if (state != 5)
3485             goto err_label;
3486           state = 6;
3487           lr_ignore_rest (ldfile, 1);
3488           break;
3489
3490         case tok_bsymbol:
3491         case tok_ucs4:
3492           /* Ignore the rest of the line if we don't need the input of
3493              this line.  */
3494           if (ignore_content)
3495             {
3496               lr_ignore_rest (ldfile, 0);
3497               break;
3498             }
3499
3500           if (state != 0 && state != 1 && state != 3 && state != 5)
3501             goto err_label;
3502
3503           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3504             goto err_label;
3505
3506           if (nowtok == tok_ucs4)
3507             {
3508               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3509               symstr = ucs4buf;
3510               symlen = 9;
3511             }
3512           else if (arg != NULL)
3513             {
3514               symstr = arg->val.str.startmb;
3515               symlen = arg->val.str.lenmb;
3516             }
3517           else
3518             {
3519               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3520                         (int) ldfile->token.val.str.lenmb,
3521                         ldfile->token.val.str.startmb);
3522               break;
3523             }
3524
3525           struct element_t *seqp;
3526           if (state == 0)
3527             {
3528               /* We are outside an `order_start' region.  This means
3529                  we must only accept definitions of values for
3530                  collation symbols since these are purely abstract
3531                  values and don't need directions associated.  */
3532               void *ptr;
3533
3534               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3535                 {
3536                   seqp = ptr;
3537
3538                   /* It's already defined.  First check whether this
3539                      is really a collating symbol.  */
3540                   if (seqp->is_character)
3541                     goto err_label;
3542
3543                   goto move_entry;
3544                 }
3545               else
3546                 {
3547                   void *result;
3548
3549                   if (find_entry (&collate->sym_table, symstr, symlen,
3550                                   &result) != 0)
3551                     /* No collating symbol, it's an error.  */
3552                     goto err_label;
3553
3554                   /* Maybe this is the first time we define a symbol
3555                      value and it is before the first actual section.  */
3556                   if (collate->sections == NULL)
3557                     collate->sections = collate->current_section =
3558                       &collate->symbol_section;
3559                 }
3560
3561               if (was_ellipsis != tok_none)
3562                 {
3563                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3564                                    charmap, repertoire, result);
3565
3566                   /* Remember that we processed the ellipsis.  */
3567                   was_ellipsis = tok_none;
3568
3569                   /* And don't add the value a second time.  */
3570                   break;
3571                 }
3572             }
3573           else if (state == 3)
3574             {
3575               /* It is possible that we already have this collation sequence.
3576                  In this case we move the entry.  */
3577               void *sym;
3578               void *ptr;
3579
3580               /* If the symbol after which we have to insert was not found
3581                  ignore all entries.  */
3582               if (collate->cursor == NULL)
3583                 {
3584                   lr_ignore_rest (ldfile, 0);
3585                   break;
3586                 }
3587
3588               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3589                 {
3590                   seqp = (struct element_t *) ptr;
3591                   goto move_entry;
3592                 }
3593
3594               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3595                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3596                 goto move_entry;
3597
3598               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3599                   && (seqp = (struct element_t *) ptr,
3600                       seqp->last != NULL || seqp->next != NULL
3601                       || (collate->start != NULL && seqp == collate->start)))
3602                 {
3603                 move_entry:
3604                   /* Remove the entry from the old position.  */
3605                   if (seqp->last == NULL)
3606                     collate->start = seqp->next;
3607                   else
3608                     seqp->last->next = seqp->next;
3609                   if (seqp->next != NULL)
3610                     seqp->next->last = seqp->last;
3611
3612                   /* We also have to check whether this entry is the
3613                      first or last of a section.  */
3614                   if (seqp->section->first == seqp)
3615                     {
3616                       if (seqp->section->first == seqp->section->last)
3617                         /* This section has no content anymore.  */
3618                         seqp->section->first = seqp->section->last = NULL;
3619                       else
3620                         seqp->section->first = seqp->next;
3621                     }
3622                   else if (seqp->section->last == seqp)
3623                     seqp->section->last = seqp->last;
3624
3625                   /* Now insert it in the new place.  */
3626                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3627                                   tok_none);
3628                   break;
3629                 }
3630
3631               /* Otherwise we just add a new entry.  */
3632             }
3633           else if (state == 5)
3634             {
3635               /* We are reordering sections.  Find the named section.  */
3636               struct section_list *runp = collate->sections;
3637               struct section_list *prevp = NULL;
3638
3639               while (runp != NULL)
3640                 {
3641                   if (runp->name != NULL
3642                       && strlen (runp->name) == symlen
3643                       && memcmp (runp->name, symstr, symlen) == 0)
3644                     break;
3645
3646                   prevp = runp;
3647                   runp = runp->next;
3648                 }
3649
3650               if (runp == NULL)
3651                 {
3652                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3653                             "LC_COLLATE", (int) symlen, symstr);
3654                   lr_ignore_rest (ldfile, 0);
3655                 }
3656               else
3657                 {
3658                   if (runp != collate->current_section)
3659                     {
3660                       /* Remove the named section from the old place and
3661                          insert it in the new one.  */
3662                       prevp->next = runp->next;
3663
3664                       runp->next = collate->current_section->next;
3665                       collate->current_section->next = runp;
3666                       collate->current_section = runp;
3667                     }
3668
3669                   /* Process the rest of the line which might change
3670                      the collation rules.  */
3671                   arg = lr_token (ldfile, charmap, result, repertoire,
3672                                   verbose);
3673                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3674                     read_directions (ldfile, arg, charmap, repertoire,
3675                                      result);
3676                 }
3677               break;
3678             }
3679           else if (was_ellipsis != tok_none)
3680             {
3681               /* Using the information in the `ellipsis_weight'
3682                  element and this and the last value we have to handle
3683                  the ellipsis now.  */
3684               assert (state == 1);
3685
3686               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3687                                repertoire, result);
3688
3689               /* Remember that we processed the ellipsis.  */
3690               was_ellipsis = tok_none;
3691
3692               /* And don't add the value a second time.  */
3693               break;
3694             }
3695
3696           /* Now insert in the new place.  */
3697           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3698           break;
3699
3700         case tok_undefined:
3701           /* Ignore the rest of the line if we don't need the input of
3702              this line.  */
3703           if (ignore_content)
3704             {
3705               lr_ignore_rest (ldfile, 0);
3706               break;
3707             }
3708
3709           if (state != 1)
3710             goto err_label;
3711
3712           if (was_ellipsis != tok_none)
3713             {
3714               lr_error (ldfile,
3715                         _("%s: cannot have `%s' as end of ellipsis range"),
3716                         "LC_COLLATE", "UNDEFINED");
3717
3718               unlink_element (collate);
3719               was_ellipsis = tok_none;
3720             }
3721
3722           /* See whether UNDEFINED already appeared somewhere.  */
3723           if (collate->undefined.next != NULL
3724               || &collate->undefined == collate->cursor)
3725             {
3726               lr_error (ldfile,
3727                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3728                         "LC_COLLATE", 9, "UNDEFINED",
3729                         collate->undefined.file,
3730                         collate->undefined.line);
3731               lr_ignore_rest (ldfile, 0);
3732             }
3733           else
3734             /* Parse the weights.  */
3735              insert_weights (ldfile, &collate->undefined, charmap,
3736                              repertoire, result, tok_none);
3737           break;
3738
3739         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3740         case tok_ellipsis3: /* absolute ellipsis */
3741         case tok_ellipsis4: /* symbolic decimal ellipsis */
3742           /* This is the symbolic (decimal or hexadecimal) or absolute
3743              ellipsis.  */
3744           if (was_ellipsis != tok_none)
3745             goto err_label;
3746
3747           if (state != 0 && state != 1 && state != 3)
3748             goto err_label;
3749
3750           was_ellipsis = nowtok;
3751
3752           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3753                           repertoire, result, nowtok);
3754           break;
3755
3756         case tok_end:
3757         seen_end:
3758           /* Next we assume `LC_COLLATE'.  */
3759           if (!ignore_content)
3760             {
3761               if (state == 0 && copy_locale == NULL)
3762                 /* We must either see a copy statement or have
3763                    ordering values.  */
3764                 lr_error (ldfile,
3765                           _("%s: empty category description not allowed"),
3766                           "LC_COLLATE");
3767               else if (state == 1)
3768                 {
3769                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3770                             "LC_COLLATE");
3771
3772                   /* Handle ellipsis at end of list.  */
3773                   if (was_ellipsis != tok_none)
3774                     {
3775                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3776                                        repertoire, result);
3777                       was_ellipsis = tok_none;
3778                     }
3779                 }
3780               else if (state == 3)
3781                 WITH_CUR_LOCALE (error (0, 0, _("\
3782 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3783               else if (state == 5)
3784                 WITH_CUR_LOCALE (error (0, 0, _("\
3785 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3786             }
3787           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3788           if (arg->tok == tok_eof)
3789             break;
3790           if (arg->tok == tok_eol)
3791             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3792           else if (arg->tok != tok_lc_collate)
3793             lr_error (ldfile, _("\
3794 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3795           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3796           return;
3797
3798         case tok_define:
3799           if (ignore_content)
3800             {
3801               lr_ignore_rest (ldfile, 0);
3802               break;
3803             }
3804
3805           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3806           if (arg->tok != tok_ident)
3807             goto err_label;
3808
3809           /* Simply add the new symbol.  */
3810           struct name_list *newsym = xmalloc (sizeof (*newsym)
3811                                               + arg->val.str.lenmb + 1);
3812           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3813           newsym->str[arg->val.str.lenmb] = '\0';
3814           newsym->next = defined;
3815           defined = newsym;
3816
3817           lr_ignore_rest (ldfile, 1);
3818           break;
3819
3820         case tok_undef:
3821           if (ignore_content)
3822             {
3823               lr_ignore_rest (ldfile, 0);
3824               break;
3825             }
3826
3827           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3828           if (arg->tok != tok_ident)
3829             goto err_label;
3830
3831           /* Remove _all_ occurrences of the symbol from the list.  */
3832           struct name_list *prevdef = NULL;
3833           struct name_list *curdef = defined;
3834           while (curdef != NULL)
3835             if (strncmp (arg->val.str.startmb, curdef->str,
3836                          arg->val.str.lenmb) == 0
3837                 && curdef->str[arg->val.str.lenmb] == '\0')
3838               {
3839                 if (prevdef == NULL)
3840                   defined = curdef->next;
3841                 else
3842                   prevdef->next = curdef->next;
3843
3844                 struct name_list *olddef = curdef;
3845                 curdef = curdef->next;
3846
3847                 free (olddef);
3848               }
3849             else
3850               {
3851                 prevdef = curdef;
3852                 curdef = curdef->next;
3853               }
3854
3855           lr_ignore_rest (ldfile, 1);
3856           break;
3857
3858         case tok_ifdef:
3859         case tok_ifndef:
3860           if (ignore_content)
3861             {
3862               lr_ignore_rest (ldfile, 0);
3863               break;
3864             }
3865
3866         found_ifdef:
3867           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3868           if (arg->tok != tok_ident)
3869             goto err_label;
3870           lr_ignore_rest (ldfile, 1);
3871
3872           if (collate->else_action == else_none)
3873             {
3874               curdef = defined;
3875               while (curdef != NULL)
3876                 if (strncmp (arg->val.str.startmb, curdef->str,
3877                              arg->val.str.lenmb) == 0
3878                     && curdef->str[arg->val.str.lenmb] == '\0')
3879                   break;
3880                 else
3881                   curdef = curdef->next;
3882
3883               if ((nowtok == tok_ifdef && curdef != NULL)
3884                   || (nowtok == tok_ifndef && curdef == NULL))
3885                 {
3886                   /* We have to use the if-branch.  */
3887                   collate->else_action = else_ignore;
3888                 }
3889               else
3890                 {
3891                   /* We have to use the else-branch, if there is one.  */
3892                   nowtok = skip_to (ldfile, collate, charmap, 0);
3893                   if (nowtok == tok_else)
3894                     collate->else_action = else_seen;
3895                   else if (nowtok == tok_elifdef)
3896                     {
3897                       nowtok = tok_ifdef;
3898                       goto found_ifdef;
3899                     }
3900                   else if (nowtok == tok_elifndef)
3901                     {
3902                       nowtok = tok_ifndef;
3903                       goto found_ifdef;
3904                     }
3905                   else if (nowtok == tok_eof)
3906                     goto seen_eof;
3907                   else if (nowtok == tok_end)
3908                     goto seen_end;
3909                 }
3910             }
3911           else
3912             {
3913               /* XXX Should it really become necessary to support nested
3914                  preprocessor handling we will push the state here.  */
3915               lr_error (ldfile, _("%s: nested conditionals not supported"),
3916                         "LC_COLLATE");
3917               nowtok = skip_to (ldfile, collate, charmap, 1);
3918               if (nowtok == tok_eof)
3919                 goto seen_eof;
3920               else if (nowtok == tok_end)
3921                 goto seen_end;
3922             }
3923           break;
3924
3925         case tok_elifdef:
3926         case tok_elifndef:
3927         case tok_else:
3928           if (ignore_content)
3929             {
3930               lr_ignore_rest (ldfile, 0);
3931               break;
3932             }
3933
3934           lr_ignore_rest (ldfile, 1);
3935
3936           if (collate->else_action == else_ignore)
3937             {
3938               /* Ignore everything until the endif.  */
3939               nowtok = skip_to (ldfile, collate, charmap, 1);
3940               if (nowtok == tok_eof)
3941                 goto seen_eof;
3942               else if (nowtok == tok_end)
3943                 goto seen_end;
3944             }
3945           else
3946             {
3947               assert (collate->else_action == else_none);
3948               lr_error (ldfile, _("\
3949 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3950                         nowtok == tok_else ? "else"
3951                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3952             }
3953           break;
3954
3955         case tok_endif:
3956           if (ignore_content)
3957             {
3958               lr_ignore_rest (ldfile, 0);
3959               break;
3960             }
3961
3962           lr_ignore_rest (ldfile, 1);
3963
3964           if (collate->else_action != else_ignore
3965               && collate->else_action != else_seen)
3966             lr_error (ldfile, _("\
3967 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3968
3969           /* XXX If we support nested preprocessor directives we pop
3970              the state here.  */
3971           collate->else_action = else_none;
3972           break;
3973
3974         default:
3975         err_label:
3976           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3977         }
3978
3979       /* Prepare for the next round.  */
3980       now = lr_token (ldfile, charmap, result, NULL, verbose);
3981       nowtok = now->tok;
3982     }
3983
3984  seen_eof:
3985   /* When we come here we reached the end of the file.  */
3986   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3987 }