locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2023 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published
   6    by the Free Software Foundation; version 2 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
  16
  17 #ifdef HAVE_CONFIG_H
  18 # include <config.h>
  19 #endif
  20
  21 #include <errno.h>
  22 #include <stdlib.h>
  23 #include <wchar.h>
  24 #include <stdint.h>
  25 #include <sys/param.h>
  26 #include <array_length.h>
  27
  28 #include "localedef.h"
  29 #include "charmap.h"
  30 #include "localeinfo.h"
  31 #include "linereader.h"
  32 #include "locfile.h"
  33 #include "elem-hash.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* #define NDEBUG 1 */
  37 #include <assert.h>
  38
  39 #define obstack_chunk_alloc malloc
  40 #define obstack_chunk_free free
  41
  42 static inline void
  43 __attribute ((always_inline))
  44 obstack_int32_grow (struct obstack *obstack, int32_t data)
  45 {
  46   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  47   data = maybe_swap_uint32 (data);
  48   if (sizeof (int32_t) == sizeof (int))
  49     obstack_int_grow (obstack, data);
  50   else
  51     obstack_grow (obstack, &data, sizeof (int32_t));
  52 }
  53
  54 static inline void
  55 __attribute ((always_inline))
  56 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  57 {
  58   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  59   data = maybe_swap_uint32 (data);
  60   if (sizeof (int32_t) == sizeof (int))
  61     obstack_int_grow_fast (obstack, data);
  62   else
  63     obstack_grow (obstack, &data, sizeof (int32_t));
  64 }
  65
  66 /* Forward declaration.  */
  67 struct element_t;
  68
  69 /* Data type for list of strings.  */
  70 struct section_list
  71 {
  72   /* Successor in the known_sections list.  */
  73   struct section_list *def_next;
  74   /* Successor in the sections list.  */
  75   struct section_list *next;
  76   /* Name of the section.  */
  77   const char *name;
  78   /* First element of this section.  */
  79   struct element_t *first;
  80   /* Last element of this section.  */
  81   struct element_t *last;
  82   /* These are the rules for this section.  */
  83   enum coll_sort_rule *rules;
  84   /* Index of the rule set in the appropriate section of the output file.  */
  85   int ruleidx;
  86 };
  87
  88 struct element_t;
  89
  90 struct element_list_t
  91 {
  92   /* Number of elements.  */
  93   int cnt;
  94
  95   struct element_t **w;
  96 };
  97
  98 /* Data type for collating element.  */
  99 struct element_t
 100 {
 101   const char *name;
 102
 103   const char *mbs;
 104   size_t nmbs;
 105   const uint32_t *wcs;
 106   size_t nwcs;
 107   int *mborder;
 108   int wcorder;
 109
 110   /* The following is a bit mask which bits are set if this element is
 111      used in the appropriate level.  Interesting for the singlebyte
 112      weight computation.
 113
 114      XXX The type here restricts the number of levels to 32.  It could
 115      be changed if necessary but I doubt this is necessary.  */
 116   unsigned int used_in_level;
 117
 118   struct element_list_t *weights;
 119
 120   /* Nonzero if this is a real character definition.  */
 121   int is_character;
 122
 123   /* Order of the character in the sequence.  This information will
 124      be used in range expressions.  */
 125   int mbseqorder;
 126   int wcseqorder;
 127
 128   /* Where does the definition come from.  */
 129   const char *file;
 130   size_t line;
 131
 132   /* Which section does this belong to.  */
 133   struct section_list *section;
 134
 135   /* Predecessor and successor in the order list.  */
 136   struct element_t *last;
 137   struct element_t *next;
 138
 139   /* Next element in multibyte output list.  */
 140   struct element_t *mbnext;
 141   struct element_t *mblast;
 142
 143   /* Next element in wide character output list.  */
 144   struct element_t *wcnext;
 145   struct element_t *wclast;
 146 };
 147
 148 /* Special element value.  */
 149 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 150 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 151 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 152
 153 /* Data type for collating symbol.  */
 154 struct symbol_t
 155 {
 156   const char *name;
 157
 158   /* Point to place in the order list.  */
 159   struct element_t *order;
 160
 161   /* Where does the definition come from.  */
 162   const char *file;
 163   size_t line;
 164 };
 165
 166 /* Sparse table of struct element_t *.  */
 167 #define TABLE wchead_table
 168 #define ELEMENT struct element_t *
 169 #define DEFAULT NULL
 170 #define ITERATE
 171 #define NO_ADD_LOCALE
 172 #include "3level.h"
 173
 174 /* Sparse table of int32_t.  */
 175 #define TABLE collidx_table
 176 #define ELEMENT int32_t
 177 #define DEFAULT 0
 178 #include "3level.h"
 179
 180 /* Sparse table of uint32_t.  */
 181 #define TABLE collseq_table
 182 #define ELEMENT uint32_t
 183 #define DEFAULT ~((uint32_t) 0)
 184 #include "3level.h"
 185
 186
 187 /* Simple name list for the preprocessor.  */
 188 struct name_list
 189 {
 190   struct name_list *next;
 191   char str[0];
 192 };
 193
 194
 195 /* The real definition of the struct for the LC_COLLATE locale.  */
 196 struct locale_collate_t
 197 {
 198   /* Does the locale use code points to compare the encoding?  */
 199   bool codepoint_collation;
 200
 201   int col_weight_max;
 202   int cur_weight_max;
 203
 204   /* List of known scripts.  */
 205   struct section_list *known_sections;
 206   /* List of used sections.  */
 207   struct section_list *sections;
 208   /* Current section using definition.  */
 209   struct section_list *current_section;
 210   /* There always can be an unnamed section.  */
 211   struct section_list unnamed_section;
 212   /* Flag whether the unnamed section has been defined.  */
 213   bool unnamed_section_defined;
 214   /* To make handling of errors easier we have another section.  */
 215   struct section_list error_section;
 216   /* Sometimes we are defining the values for collating symbols before
 217      the first actual section.  */
 218   struct section_list symbol_section;
 219
 220   /* Start of the order list.  */
 221   struct element_t *start;
 222
 223   /* The undefined element.  */
 224   struct element_t undefined;
 225
 226   /* This is the cursor for `reorder_after' insertions.  */
 227   struct element_t *cursor;
 228
 229   /* This value is used when handling ellipsis.  */
 230   struct element_t ellipsis_weight;
 231
 232   /* Known collating elements.  */
 233   hash_table elem_table;
 234
 235   /* Known collating symbols.  */
 236   hash_table sym_table;
 237
 238   /* Known collation sequences.  */
 239   hash_table seq_table;
 240
 241   struct obstack mempool;
 242
 243   /* The LC_COLLATE category is a bit special as it is sometimes possible
 244      that the definitions from more than one input file contains information.
 245      Therefore we keep all relevant input in a list.  */
 246   struct locale_collate_t *next;
 247
 248   /* Arrays with heads of the list for each of the leading bytes in
 249      the multibyte sequences.  */
 250   struct element_t *mbheads[256];
 251
 252   /* Arrays with heads of the list for each of the leading bytes in
 253      the multibyte sequences.  */
 254   struct wchead_table wcheads;
 255
 256   /* The arrays with the collation sequence order.  */
 257   unsigned char mbseqorder[256];
 258   struct collseq_table wcseqorder;
 259
 260   /* State of the preprocessor.  */
 261   enum
 262     {
 263       else_none = 0,
 264       else_ignore,
 265       else_seen
 266     }
 267     else_action;
 268 };
 269
 270
 271 /* We have a few global variables which are used for reading all
 272    LC_COLLATE category descriptions in all files.  */
 273 static uint32_t nrules;
 274
 275 /* List of defined preprocessor symbols.  */
 276 static struct name_list *defined;
 277
 278
 279 /* We need UTF-8 encoding of numbers.  */
 280 static inline int
 281 __attribute ((always_inline))
 282 utf8_encode (char *buf, int val)
 283 {
 284   int retval;
 285
 286   if (val < 0x80)
 287     {
 288       *buf++ = (char) val;
 289       retval = 1;
 290     }
 291   else
 292     {
 293       int step;
 294
 295       for (step = 2; step < 6; ++step)
 296         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 297           break;
 298       retval = step;
 299
 300       *buf = (unsigned char) (~0xff >> step);
 301       --step;
 302       do
 303         {
 304           buf[step] = 0x80 | (val & 0x3f);
 305           val >>= 6;
 306         }
 307       while (--step > 0);
 308       *buf |= val;
 309     }
 310
 311   return retval;
 312 }
 313
 314
 315 static struct section_list *
 316 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 317                    struct section_list *next)
 318 {
 319   struct section_list *newp;
 320
 321   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 322                                                 sizeof (*newp));
 323   newp->next = next;
 324   newp->name = string;
 325   newp->first = NULL;
 326   newp->last = NULL;
 327
 328   return newp;
 329 }
 330
 331
 332 static struct element_t *
 333 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 334              const uint32_t *wcs, const char *name, size_t namelen,
 335              int is_character)
 336 {
 337   struct element_t *newp;
 338
 339   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 340                                              sizeof (*newp));
 341   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 342                                                     name, namelen);
 343   if (mbs != NULL)
 344     {
 345       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 346       newp->nmbs = mbslen;
 347     }
 348   else
 349     {
 350       newp->mbs = NULL;
 351       newp->nmbs = 0;
 352     }
 353   if (wcs != NULL)
 354     {
 355       size_t nwcs = wcslen ((wchar_t *) wcs);
 356       uint32_t zero = 0;
 357       /* Handle <U0000> as a single character.  */
 358       if (nwcs == 0)
 359         nwcs = 1;
 360       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 361       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 362       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 363       newp->nwcs = nwcs;
 364     }
 365   else
 366     {
 367       newp->wcs = NULL;
 368       newp->nwcs = 0;
 369     }
 370   newp->mborder = NULL;
 371   newp->wcorder = 0;
 372   newp->used_in_level = 0;
 373   newp->is_character = is_character;
 374
 375   /* Will be assigned later.  XXX  */
 376   newp->mbseqorder = 0;
 377   newp->wcseqorder = 0;
 378
 379   /* Will be allocated later.  */
 380   newp->weights = NULL;
 381
 382   newp->file = NULL;
 383   newp->line = 0;
 384
 385   newp->section = collate->current_section;
 386
 387   newp->last = NULL;
 388   newp->next = NULL;
 389
 390   newp->mbnext = NULL;
 391   newp->mblast = NULL;
 392
 393   newp->wcnext = NULL;
 394   newp->wclast = NULL;
 395
 396   return newp;
 397 }
 398
 399
 400 static struct symbol_t *
 401 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 402 {
 403   struct symbol_t *newp;
 404
 405   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 406
 407   newp->name = obstack_copy0 (&collate->mempool, name, len);
 408   newp->order = NULL;
 409
 410   newp->file = NULL;
 411   newp->line = 0;
 412
 413   return newp;
 414 }
 415
 416
 417 /* Test whether this name is already defined somewhere.  */
 418 static int
 419 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 420                  const struct charmap_t *charmap,
 421                  struct repertoire_t *repertoire, const char *symbol,
 422                  size_t symbol_len)
 423 {
 424   void *ignore = NULL;
 425
 426   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 427     {
 428       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 429                 (int) symbol_len, symbol);
 430       return 1;
 431     }
 432
 433   if (repertoire != NULL
 434       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 435           == 0))
 436     {
 437       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 438                 (int) symbol_len, symbol);
 439       return 1;
 440     }
 441
 442   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 443     {
 444       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 445                 (int) symbol_len, symbol);
 446       return 1;
 447     }
 448
 449   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 450     {
 451       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 452                 (int) symbol_len, symbol);
 453       return 1;
 454     }
 455
 456   return 0;
 457 }
 458
 459
 460 /* Read the direction specification.  */
 461 static void
 462 read_directions (struct linereader *ldfile, struct token *arg,
 463                  const struct charmap_t *charmap,
 464                  struct repertoire_t *repertoire, struct localedef_t *result)
 465 {
 466   int cnt = 0;
 467   int max = nrules ?: 10;
 468   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 469   int warned = 0;
 470   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 471
 472   while (1)
 473     {
 474       int valid = 0;
 475
 476       if (arg->tok == tok_forward)
 477         {
 478           if (rules[cnt] & sort_backward)
 479             {
 480               if (! warned)
 481                 {
 482                   lr_error (ldfile, _("\
 483 %s: `forward' and `backward' are mutually excluding each other"),
 484                             "LC_COLLATE");
 485                   warned = 1;
 486                 }
 487             }
 488           else if (rules[cnt] & sort_forward)
 489             {
 490               if (! warned)
 491                 {
 492                   lr_error (ldfile, _("\
 493 %s: `%s' mentioned more than once in definition of weight %d"),
 494                             "LC_COLLATE", "forward", cnt + 1);
 495                 }
 496             }
 497           else
 498             rules[cnt] |= sort_forward;
 499
 500           valid = 1;
 501         }
 502       else if (arg->tok == tok_backward)
 503         {
 504           if (rules[cnt] & sort_forward)
 505             {
 506               if (! warned)
 507                 {
 508                   lr_error (ldfile, _("\
 509 %s: `forward' and `backward' are mutually excluding each other"),
 510                             "LC_COLLATE");
 511                   warned = 1;
 512                 }
 513             }
 514           else if (rules[cnt] & sort_backward)
 515             {
 516               if (! warned)
 517                 {
 518                   lr_error (ldfile, _("\
 519 %s: `%s' mentioned more than once in definition of weight %d"),
 520                             "LC_COLLATE", "backward", cnt + 1);
 521                 }
 522             }
 523           else
 524             rules[cnt] |= sort_backward;
 525
 526           valid = 1;
 527         }
 528       else if (arg->tok == tok_position)
 529         {
 530           if (rules[cnt] & sort_position)
 531             {
 532               if (! warned)
 533                 {
 534                   lr_error (ldfile, _("\
 535 %s: `%s' mentioned more than once in definition of weight %d"),
 536                             "LC_COLLATE", "position", cnt + 1);
 537                 }
 538             }
 539           else
 540             rules[cnt] |= sort_position;
 541
 542           valid = 1;
 543         }
 544
 545       if (valid)
 546         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 547
 548       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 549           || arg->tok == tok_semicolon)
 550         {
 551           if (! valid && ! warned)
 552             {
 553               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 554               warned = 1;
 555             }
 556
 557           /* See whether we have to increment the counter.  */
 558           if (arg->tok != tok_comma && rules[cnt] != 0)
 559             {
 560               /* Add the default `forward' if we have seen only `position'.  */
 561               if (rules[cnt] == sort_position)
 562                 rules[cnt] = sort_position | sort_forward;
 563
 564               ++cnt;
 565             }
 566
 567           if (arg->tok == tok_eof || arg->tok == tok_eol)
 568             /* End of line or file, so we exit the loop.  */
 569             break;
 570
 571           if (nrules == 0)
 572             {
 573               /* See whether we have enough room in the array.  */
 574               if (cnt == max)
 575                 {
 576                   max += 10;
 577                   rules = (enum coll_sort_rule *) xrealloc (rules,
 578                                                             max
 579                                                             * sizeof (*rules));
 580                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 581                 }
 582             }
 583           else
 584             {
 585               if (cnt == nrules)
 586                 {
 587                   /* There must not be any more rule.  */
 588                   if (! warned)
 589                     {
 590                       lr_error (ldfile, _("\
 591 %s: too many rules; first entry only had %d"),
 592                                 "LC_COLLATE", nrules);
 593                       warned = 1;
 594                     }
 595
 596                   lr_ignore_rest (ldfile, 0);
 597                   break;
 598                 }
 599             }
 600         }
 601       else
 602         {
 603           if (! warned)
 604             {
 605               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 606               warned = 1;
 607             }
 608         }
 609
 610       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 611     }
 612
 613   if (nrules == 0)
 614     {
 615       /* Now we know how many rules we have.  */
 616       nrules = cnt;
 617       rules = (enum coll_sort_rule *) xrealloc (rules,
 618                                                 nrules * sizeof (*rules));
 619     }
 620   else
 621     {
 622       if (cnt < nrules)
 623         {
 624           /* Not enough rules in this specification.  */
 625           if (! warned)
 626             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 627
 628           do
 629             rules[cnt] = sort_forward;
 630           while (++cnt < nrules);
 631         }
 632     }
 633
 634   collate->current_section->rules = rules;
 635 }
 636
 637
 638 static struct element_t *
 639 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 640               const char *str, size_t len)
 641 {
 642   void *result = NULL;
 643
 644   /* Search for the entries among the collation sequences already define.  */
 645   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 646     {
 647       /* Nope, not define yet.  So we see whether it is a
 648          collation symbol.  */
 649       void *ptr;
 650
 651       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 652         {
 653           /* It's a collation symbol.  */
 654           struct symbol_t *sym = (struct symbol_t *) ptr;
 655           result = sym->order;
 656
 657           if (result == NULL)
 658             result = sym->order = new_element (collate, NULL, 0, NULL,
 659                                                NULL, 0, 0);
 660         }
 661       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 662         {
 663           /* It's also no collation element.  So it is a character
 664              element defined later.  */
 665           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 666           /* Insert it into the sequence table.  */
 667           insert_entry (&collate->seq_table, str, len, result);
 668         }
 669     }
 670
 671   return (struct element_t *) result;
 672 }
 673
 674
 675 static void
 676 unlink_element (struct locale_collate_t *collate)
 677 {
 678   if (collate->cursor == collate->start)
 679     {
 680       assert (collate->cursor->next == NULL);
 681       assert (collate->cursor->last == NULL);
 682       collate->cursor = NULL;
 683     }
 684   else
 685     {
 686       if (collate->cursor->next != NULL)
 687         collate->cursor->next->last = collate->cursor->last;
 688       if (collate->cursor->last != NULL)
 689         collate->cursor->last->next = collate->cursor->next;
 690       collate->cursor = collate->cursor->last;
 691     }
 692 }
 693
 694
 695 static void
 696 insert_weights (struct linereader *ldfile, struct element_t *elem,
 697                 const struct charmap_t *charmap,
 698                 struct repertoire_t *repertoire, struct localedef_t *result,
 699                 enum token_t ellipsis)
 700 {
 701   int weight_cnt;
 702   struct token *arg;
 703   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 704
 705   /* Initialize all the fields.  */
 706   elem->file = ldfile->fname;
 707   elem->line = ldfile->lineno;
 708
 709   elem->last = collate->cursor;
 710   elem->next = collate->cursor ? collate->cursor->next : NULL;
 711   if (collate->cursor != NULL && collate->cursor->next != NULL)
 712     collate->cursor->next->last = elem;
 713   if (collate->cursor != NULL)
 714     collate->cursor->next = elem;
 715   if (collate->start == NULL)
 716     {
 717       assert (collate->cursor == NULL);
 718       collate->start = elem;
 719     }
 720
 721   elem->section = collate->current_section;
 722
 723   if (collate->current_section->first == NULL)
 724     collate->current_section->first = elem;
 725   if (collate->current_section->last == collate->cursor)
 726     collate->current_section->last = elem;
 727
 728   collate->cursor = elem;
 729
 730   elem->weights = (struct element_list_t *)
 731     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 732   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 733
 734   weight_cnt = 0;
 735
 736   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 737   do
 738     {
 739       if (arg->tok == tok_eof || arg->tok == tok_eol)
 740         break;
 741
 742       if (arg->tok == tok_ignore)
 743         {
 744           /* The weight for this level has to be ignored.  We use the
 745              null pointer to indicate this.  */
 746           elem->weights[weight_cnt].w = (struct element_t **)
 747             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 748           elem->weights[weight_cnt].w[0] = NULL;
 749           elem->weights[weight_cnt].cnt = 1;
 750         }
 751       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 752         {
 753           char ucs4str[10];
 754           struct element_t *val;
 755           char *symstr;
 756           size_t symlen;
 757
 758           if (arg->tok == tok_bsymbol)
 759             {
 760               symstr = arg->val.str.startmb;
 761               symlen = arg->val.str.lenmb;
 762             }
 763           else
 764             {
 765               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 766               symstr = ucs4str;
 767               symlen = 9;
 768             }
 769
 770           val = find_element (ldfile, collate, symstr, symlen);
 771           if (val == NULL)
 772             break;
 773
 774           elem->weights[weight_cnt].w = (struct element_t **)
 775             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 776           elem->weights[weight_cnt].w[0] = val;
 777           elem->weights[weight_cnt].cnt = 1;
 778         }
 779       else if (arg->tok == tok_string)
 780         {
 781           /* Split the string up in the individual characters and put
 782              the element definitions in the list.  */
 783           const char *cp = arg->val.str.startmb;
 784           int cnt = 0;
 785           struct element_t *charelem;
 786           struct element_t **weights = NULL;
 787           int max = 0;
 788
 789           if (*cp == '\0')
 790             {
 791               lr_error (ldfile, _("%s: empty weight string not allowed"),
 792                         "LC_COLLATE");
 793               lr_ignore_rest (ldfile, 0);
 794               break;
 795             }
 796
 797           do
 798             {
 799               if (*cp == '<')
 800                 {
 801                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 802                      the latter we have to unify the name.  */
 803                   const char *startp = ++cp;
 804                   size_t len;
 805
 806                   while (*cp != '>')
 807                     {
 808                       if (*cp == ldfile->escape_char)
 809                         ++cp;
 810                       if (*cp == '\0')
 811                         /* It's a syntax error.  */
 812                         goto syntax;
 813
 814                       ++cp;
 815                     }
 816
 817                   if (cp - startp == 5 && startp[0] == 'U'
 818                       && isxdigit (startp[1]) && isxdigit (startp[2])
 819                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 820                     {
 821                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 822                       char *newstr;
 823
 824                       newstr = (char *) xmalloc (10);
 825                       snprintf (newstr, 10, "U%08X", ucs4);
 826                       startp = newstr;
 827
 828                       len = 9;
 829                     }
 830                   else
 831                     len = cp - startp;
 832
 833                   charelem = find_element (ldfile, collate, startp, len);
 834                   ++cp;
 835                 }
 836               else
 837                 {
 838                   /* People really shouldn't use characters directly in
 839                      the string.  Especially since it's not really clear
 840                      what this means.  We interpret all characters in the
 841                      string as if that would be bsymbols.  Otherwise we
 842                      would have to match back to bsymbols somehow and this
 843                      is normally not what people normally expect.  */
 844                   charelem = find_element (ldfile, collate, cp++, 1);
 845                 }
 846
 847               if (charelem == NULL)
 848                 {
 849                   /* We ignore the rest of the line.  */
 850                   lr_ignore_rest (ldfile, 0);
 851                   break;
 852                 }
 853
 854               /* Add the pointer.  */
 855               if (cnt >= max)
 856                 {
 857                   struct element_t **newp;
 858                   max += 10;
 859                   newp = (struct element_t **)
 860                     alloca (max * sizeof (struct element_t *));
 861                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 862                   weights = newp;
 863                 }
 864               weights[cnt++] = charelem;
 865             }
 866           while (*cp != '\0');
 867
 868           /* Now store the information.  */
 869           elem->weights[weight_cnt].w = (struct element_t **)
 870             obstack_alloc (&collate->mempool,
 871                            cnt * sizeof (struct element_t *));
 872           memcpy (elem->weights[weight_cnt].w, weights,
 873                   cnt * sizeof (struct element_t *));
 874           elem->weights[weight_cnt].cnt = cnt;
 875
 876           /* We don't need the string anymore.  */
 877           free (arg->val.str.startmb);
 878         }
 879       else if (ellipsis != tok_none
 880                && (arg->tok == tok_ellipsis2
 881                    || arg->tok == tok_ellipsis3
 882                    || arg->tok == tok_ellipsis4))
 883         {
 884           /* It must be the same ellipsis as used in the initial column.  */
 885           if (arg->tok != ellipsis)
 886             lr_error (ldfile, _("\
 887 %s: weights must use the same ellipsis symbol as the name"),
 888                       "LC_COLLATE");
 889
 890           /* The weight for this level will depend on the element
 891              iterating over the range.  Put a placeholder.  */
 892           elem->weights[weight_cnt].w = (struct element_t **)
 893             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 894           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 895           elem->weights[weight_cnt].cnt = 1;
 896         }
 897       else
 898         {
 899         syntax:
 900           /* It's a syntax error.  */
 901           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 902           lr_ignore_rest (ldfile, 0);
 903           break;
 904         }
 905
 906       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 907       /* This better should be the end of the line or a semicolon.  */
 908       if (arg->tok == tok_semicolon)
 909         /* OK, ignore this and read the next token.  */
 910         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 911       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 912         {
 913           /* It's a syntax error.  */
 914           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 915           lr_ignore_rest (ldfile, 0);
 916           break;
 917         }
 918     }
 919   while (++weight_cnt < nrules);
 920
 921   if (weight_cnt < nrules)
 922     {
 923       /* This means the rest of the line uses the current element as
 924          the weight.  */
 925       do
 926         {
 927           elem->weights[weight_cnt].w = (struct element_t **)
 928             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 929           if (ellipsis == tok_none)
 930             elem->weights[weight_cnt].w[0] = elem;
 931           else
 932             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 933           elem->weights[weight_cnt].cnt = 1;
 934         }
 935       while (++weight_cnt < nrules);
 936     }
 937   else
 938     {
 939       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 940         {
 941           /* Too many rule values.  */
 942           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 943           lr_ignore_rest (ldfile, 0);
 944         }
 945       else
 946         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 947     }
 948 }
 949
 950
 951 static int
 952 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 953               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 954               struct localedef_t *result)
 955 {
 956   /* First find out what kind of symbol this is.  */
 957   struct charseq *seq;
 958   uint32_t wc;
 959   struct element_t *elem = NULL;
 960   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 961
 962   /* Try to find the character in the charmap.  */
 963   seq = charmap_find_value (charmap, symstr, symlen);
 964
 965   /* Determine the wide character.  */
 966   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 967     {
 968       wc = repertoire_find_value (repertoire, symstr, symlen);
 969       if (seq != NULL)
 970         seq->ucs4 = wc;
 971     }
 972   else
 973     wc = seq->ucs4;
 974
 975   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 976     {
 977       /* It's no character, so look through the collation elements and
 978          symbol list.  */
 979       void *ptr = elem;
 980       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 981         {
 982           void *result;
 983           struct symbol_t *sym = NULL;
 984
 985           /* It's also collation element.  Therefore it's either a
 986              collating symbol or it's a character which is not
 987              supported by the character set.  In the later case we
 988              simply create a dummy entry.  */
 989           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 990             {
 991               /* It's a collation symbol.  */
 992               sym = (struct symbol_t *) result;
 993
 994               elem = sym->order;
 995             }
 996
 997           if (elem == NULL)
 998             {
 999               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
1000
1001               if (sym != NULL)
1002                 sym->order = elem;
1003               else
1004                 /* Enter a fake element in the sequence table.  This
1005                    won't cause anything in the output since there is
1006                    no multibyte or wide character associated with
1007                    it.  */
1008                 insert_entry (&collate->seq_table, symstr, symlen, elem);
1009             }
1010         }
1011       else
1012         /* Copy the result back.  */
1013         elem = ptr;
1014     }
1015   else
1016     {
1017       /* Otherwise the symbols stands for a character.  */
1018       void *ptr = elem;
1019       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1020         {
1021           uint32_t wcs[2] = { wc, 0 };
1022
1023           /* We have to allocate an entry.  */
1024           elem = new_element (collate,
1025                               seq != NULL ? (char *) seq->bytes : NULL,
1026                               seq != NULL ? seq->nbytes : 0,
1027                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1028                               symstr, symlen, 1);
1029
1030           /* And add it to the table.  */
1031           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1032             /* This cannot happen.  */
1033             assert (! "Internal error");
1034         }
1035       else
1036         {
1037           /* Copy the result back.  */
1038           elem = ptr;
1039
1040           /* Maybe the character was used before the definition.  In this case
1041              we have to insert the byte sequences now.  */
1042           if (elem->mbs == NULL && seq != NULL)
1043             {
1044               elem->mbs = obstack_copy0 (&collate->mempool,
1045                                          seq->bytes, seq->nbytes);
1046               elem->nmbs = seq->nbytes;
1047             }
1048
1049           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1050             {
1051               uint32_t wcs[2] = { wc, 0 };
1052
1053               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1054               elem->nwcs = 1;
1055             }
1056         }
1057     }
1058
1059   /* Test whether this element is not already in the list.  */
1060   if (elem->next != NULL || elem == collate->cursor)
1061     {
1062       lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
1063                 (int) symlen, symstr, elem->file, elem->line);
1064       lr_ignore_rest (ldfile, 0);
1065       return 1;
1066     }
1067
1068   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1069
1070   return 0;
1071 }
1072
1073
1074 static void
1075 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1076                  enum token_t ellipsis, const struct charmap_t *charmap,
1077                  struct repertoire_t *repertoire,
1078                  struct localedef_t *result)
1079 {
1080   struct element_t *startp;
1081   struct element_t *endp;
1082   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1083
1084   /* Unlink the entry added for the ellipsis.  */
1085   unlink_element (collate);
1086   startp = collate->cursor;
1087
1088   /* Process and add the end-entry.  */
1089   if (symstr != NULL
1090       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1091     /* Something went wrong with inserting the to-value.  This means
1092        we cannot process the ellipsis.  */
1093     return;
1094
1095   /* Reset the cursor.  */
1096   collate->cursor = startp;
1097
1098   /* Now we have to handle many different situations:
1099      - we have to distinguish between the three different ellipsis forms
1100      - the is the ellipsis at the beginning, in the middle, or at the end.
1101   */
1102   endp = collate->cursor->next;
1103   assert (symstr == NULL || endp != NULL);
1104
1105   /* XXX The following is probably very wrong since also collating symbols
1106      can appear in ranges.  But do we want/can refine the test for that?  */
1107 #if 0
1108   /* Both, the start and the end symbol, must stand for characters.  */
1109   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1110       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1111     {
1112       lr_error (ldfile, _("\
1113 %s: the start and the end symbol of a range must stand for characters"),
1114                 "LC_COLLATE");
1115       return;
1116     }
1117 #endif
1118
1119   if (ellipsis == tok_ellipsis3)
1120     {
1121       /* One requirement we make here: the length of the byte
1122          sequences for the first and end character must be the same.
1123          This is mainly to prevent unwanted effects and this is often
1124          not what is wanted.  */
1125       size_t len = (startp->mbs != NULL ? startp->nmbs
1126                     : (endp->mbs != NULL ? endp->nmbs : 0));
1127       char mbcnt[len + 1];
1128       char mbend[len + 1];
1129
1130       /* Well, this should be caught somewhere else already.  Just to
1131          make sure.  */
1132       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1133       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1134
1135       if (startp != NULL && endp != NULL
1136           && startp->mbs != NULL && endp->mbs != NULL
1137           && startp->nmbs != endp->nmbs)
1138         {
1139           lr_error (ldfile, _("\
1140 %s: byte sequences of first and last character must have the same length"),
1141                     "LC_COLLATE");
1142           return;
1143         }
1144
1145       /* Determine whether we have to generate multibyte sequences.  */
1146       if ((startp == NULL || startp->mbs != NULL)
1147           && (endp == NULL || endp->mbs != NULL))
1148         {
1149           int cnt;
1150           int ret;
1151
1152           /* Prepare the beginning byte sequence.  This is either from the
1153              beginning byte sequence or it is all nulls if it was an
1154              initial ellipsis.  */
1155           if (startp == NULL || startp->mbs == NULL)
1156             memset (mbcnt, '\0', len);
1157           else
1158             {
1159               memcpy (mbcnt, startp->mbs, len);
1160
1161               /* And increment it so that the value is the first one we will
1162                  try to insert.  */
1163               for (cnt = len - 1; cnt >= 0; --cnt)
1164                 if (++mbcnt[cnt] != '\0')
1165                   break;
1166             }
1167           mbcnt[len] = '\0';
1168
1169           /* And the end sequence.  */
1170           if (endp == NULL || endp->mbs == NULL)
1171             memset (mbend, '\0', len);
1172           else
1173             memcpy (mbend, endp->mbs, len);
1174           mbend[len] = '\0';
1175
1176           /* Test whether we have a correct range.  */
1177           ret = memcmp (mbcnt, mbend, len);
1178           if (ret >= 0)
1179             {
1180               if (ret > 0)
1181                 lr_error (ldfile, _("%s: byte sequence of first character of \
1182 range is not lower than that of the last character"), "LC_COLLATE");
1183               return;
1184             }
1185
1186           /* Generate the byte sequences data.  */
1187           while (1)
1188             {
1189               struct charseq *seq;
1190
1191               /* Quite a bit of work ahead.  We have to find the character
1192                  definition for the byte sequence and then determine the
1193                  wide character belonging to it.  */
1194               seq = charmap_find_symbol (charmap, mbcnt, len);
1195               if (seq != NULL)
1196                 {
1197                   struct element_t *elem;
1198                   size_t namelen;
1199
1200                   /* I don't think this can ever happen.  */
1201                   assert (seq->name != NULL);
1202                   namelen = strlen (seq->name);
1203
1204                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1205                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1206                                                        namelen);
1207
1208                   /* Now we are ready to insert the new value in the
1209                      sequence.  Find out whether the element is
1210                      already known.  */
1211                   void *ptr;
1212                   if (find_entry (&collate->seq_table, seq->name, namelen,
1213                                   &ptr) != 0)
1214                     {
1215                       uint32_t wcs[2] = { seq->ucs4, 0 };
1216
1217                       /* We have to allocate an entry.  */
1218                       elem = new_element (collate, mbcnt, len,
1219                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1220                                           ? NULL : wcs, seq->name,
1221                                           namelen, 1);
1222
1223                       /* And add it to the table.  */
1224                       if (insert_entry (&collate->seq_table, seq->name,
1225                                         namelen, elem) != 0)
1226                         /* This cannot happen.  */
1227                         assert (! "Internal error");
1228                     }
1229                   else
1230                     /* Copy the result.  */
1231                     elem = ptr;
1232
1233                   /* Test whether this element is not already in the list.  */
1234                   if (elem->next != NULL || (collate->cursor != NULL
1235                                              && elem->next == collate->cursor))
1236                     {
1237                       lr_error (ldfile, _("\
1238 order for `%.*s' already defined at %s:%zu"),
1239                                 (int) namelen, seq->name,
1240                                 elem->file, elem->line);
1241                       goto increment;
1242                     }
1243
1244                   /* Enqueue the new element.  */
1245                   elem->last = collate->cursor;
1246                   if (collate->cursor == NULL)
1247                     elem->next = NULL;
1248                   else
1249                     {
1250                       elem->next = collate->cursor->next;
1251                       elem->last->next = elem;
1252                       if (elem->next != NULL)
1253                         elem->next->last = elem;
1254                     }
1255                   if (collate->start == NULL)
1256                     {
1257                       assert (collate->cursor == NULL);
1258                       collate->start = elem;
1259                     }
1260                   collate->cursor = elem;
1261
1262                  /* Add the weight value.  We take them from the
1263                     `ellipsis_weights' member of `collate'.  */
1264                   elem->weights = (struct element_list_t *)
1265                     obstack_alloc (&collate->mempool,
1266                                    nrules * sizeof (struct element_list_t));
1267                   for (cnt = 0; cnt < nrules; ++cnt)
1268                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1269                         && (collate->ellipsis_weight.weights[cnt].w[0]
1270                             == ELEMENT_ELLIPSIS2))
1271                       {
1272                         elem->weights[cnt].w = (struct element_t **)
1273                           obstack_alloc (&collate->mempool,
1274                                          sizeof (struct element_t *));
1275                         elem->weights[cnt].w[0] = elem;
1276                         elem->weights[cnt].cnt = 1;
1277                       }
1278                     else
1279                       {
1280                         /* Simply use the weight from `ellipsis_weight'.  */
1281                         elem->weights[cnt].w =
1282                           collate->ellipsis_weight.weights[cnt].w;
1283                         elem->weights[cnt].cnt =
1284                           collate->ellipsis_weight.weights[cnt].cnt;
1285                       }
1286                 }
1287
1288               /* Increment for the next round.  */
1289             increment:
1290               for (cnt = len - 1; cnt >= 0; --cnt)
1291                 if (++mbcnt[cnt] != '\0')
1292                   break;
1293
1294               /* Find out whether this was all.  */
1295               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1296                 /* Yep, that's all.  */
1297                 break;
1298             }
1299         }
1300     }
1301   else
1302     {
1303       /* For symbolic range we naturally must have a beginning and an
1304          end specified by the user.  */
1305       if (startp == NULL)
1306         lr_error (ldfile, _("\
1307 %s: symbolic range ellipsis must not directly follow `order_start'"),
1308                   "LC_COLLATE");
1309       else if (endp == NULL)
1310         lr_error (ldfile, _("\
1311 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1312                   "LC_COLLATE");
1313       else
1314         {
1315           /* Determine the range.  To do so we have to determine the
1316              common prefix of the both names and then the numeric
1317              values of both ends.  */
1318           size_t lenfrom = strlen (startp->name);
1319           size_t lento = strlen (endp->name);
1320           char buf[lento + 1];
1321           int preflen = 0;
1322           long int from;
1323           long int to;
1324           char *cp;
1325           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1326
1327           if (lenfrom != lento)
1328             {
1329             invalid_range:
1330               lr_error (ldfile, _("\
1331 `%s' and `%.*s' are not valid names for symbolic range"),
1332                         startp->name, (int) lento, endp->name);
1333               return;
1334             }
1335
1336           while (startp->name[preflen] == endp->name[preflen])
1337             if (startp->name[preflen] == '\0')
1338               /* Nothing to be done.  The start and end point are identical
1339                  and while inserting the end point we have already given
1340                  the user an error message.  */
1341               return;
1342             else
1343               ++preflen;
1344
1345           errno = 0;
1346           from = strtol (startp->name + preflen, &cp, base);
1347           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1348             goto invalid_range;
1349
1350           errno = 0;
1351           to = strtol (endp->name + preflen, &cp, base);
1352           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1353             goto invalid_range;
1354
1355           /* Copy the prefix.  */
1356           memcpy (buf, startp->name, preflen);
1357
1358           /* Loop over all values.  */
1359           for (++from; from < to; ++from)
1360             {
1361               struct element_t *elem = NULL;
1362               struct charseq *seq;
1363               uint32_t wc;
1364               int cnt;
1365
1366               /* Generate the name.  */
1367               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1368                        (int) (lenfrom - preflen), from);
1369
1370               /* Look whether this name is already defined.  */
1371               void *ptr;
1372               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1373                 {
1374                   /* Copy back the result.  */
1375                   elem = ptr;
1376
1377                   if (elem->next != NULL || (collate->cursor != NULL
1378                                              && elem->next == collate->cursor))
1379                     {
1380                       lr_error (ldfile, _("\
1381 %s: order for `%.*s' already defined at %s:%zu"),
1382                                 "LC_COLLATE", (int) lenfrom, buf,
1383                                 elem->file, elem->line);
1384                       continue;
1385                     }
1386
1387                   if (elem->name == NULL)
1388                     {
1389                       lr_error (ldfile, _("%s: `%s' must be a character"),
1390                                 "LC_COLLATE", buf);
1391                       continue;
1392                     }
1393                 }
1394
1395               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1396                 {
1397                   /* Search for a character of this name.  */
1398                   seq = charmap_find_value (charmap, buf, lenfrom);
1399                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1400                     {
1401                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1402
1403                       if (seq != NULL)
1404                         seq->ucs4 = wc;
1405                     }
1406                   else
1407                     wc = seq->ucs4;
1408
1409                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1410                     /* We don't know anything about a character with this
1411                        name.  XXX Should we warn?  */
1412                     continue;
1413
1414                   if (elem == NULL)
1415                     {
1416                       uint32_t wcs[2] = { wc, 0 };
1417
1418                       /* We have to allocate an entry.  */
1419                       elem = new_element (collate,
1420                                           seq != NULL
1421                                           ? (char *) seq->bytes : NULL,
1422                                           seq != NULL ? seq->nbytes : 0,
1423                                           wc == ILLEGAL_CHAR_VALUE
1424                                           ? NULL : wcs, buf, lenfrom, 1);
1425                     }
1426                   else
1427                     {
1428                       /* Update the element.  */
1429                       if (seq != NULL)
1430                         {
1431                           elem->mbs = obstack_copy0 (&collate->mempool,
1432                                                      seq->bytes, seq->nbytes);
1433                           elem->nmbs = seq->nbytes;
1434                         }
1435
1436                       if (wc != ILLEGAL_CHAR_VALUE)
1437                         {
1438                           uint32_t zero = 0;
1439
1440                           obstack_grow (&collate->mempool,
1441                                         &wc, sizeof (uint32_t));
1442                           obstack_grow (&collate->mempool,
1443                                         &zero, sizeof (uint32_t));
1444                           elem->wcs = obstack_finish (&collate->mempool);
1445                           elem->nwcs = 1;
1446                         }
1447                     }
1448
1449                   elem->file = ldfile->fname;
1450                   elem->line = ldfile->lineno;
1451                   elem->section = collate->current_section;
1452                 }
1453
1454               /* Enqueue the new element.  */
1455               elem->last = collate->cursor;
1456               elem->next = collate->cursor->next;
1457               elem->last->next = elem;
1458               if (elem->next != NULL)
1459                 elem->next->last = elem;
1460               collate->cursor = elem;
1461
1462               /* Now add the weights.  They come from the `ellipsis_weights'
1463                  member of `collate'.  */
1464               elem->weights = (struct element_list_t *)
1465                 obstack_alloc (&collate->mempool,
1466                                nrules * sizeof (struct element_list_t));
1467               for (cnt = 0; cnt < nrules; ++cnt)
1468                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1469                     && (collate->ellipsis_weight.weights[cnt].w[0]
1470                         == ELEMENT_ELLIPSIS2))
1471                   {
1472                     elem->weights[cnt].w = (struct element_t **)
1473                       obstack_alloc (&collate->mempool,
1474                                      sizeof (struct element_t *));
1475                     elem->weights[cnt].w[0] = elem;
1476                     elem->weights[cnt].cnt = 1;
1477                   }
1478                 else
1479                   {
1480                     /* Simly use the weight from `ellipsis_weight'.  */
1481                     elem->weights[cnt].w =
1482                       collate->ellipsis_weight.weights[cnt].w;
1483                     elem->weights[cnt].cnt =
1484                       collate->ellipsis_weight.weights[cnt].cnt;
1485                   }
1486             }
1487         }
1488     }
1489   /* Move the cursor to the last entry in the ellipsis.
1490      Subsequent operations need to start from the last entry.  */
1491   collate->cursor = endp;
1492 }
1493
1494
1495 static void
1496 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1497                  struct localedef_t *copy_locale, int ignore_content)
1498 {
1499   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1500     {
1501       struct locale_collate_t *collate;
1502
1503       if (copy_locale == NULL)
1504         {
1505           collate = locale->categories[LC_COLLATE].collate =
1506             (struct locale_collate_t *)
1507             xcalloc (1, sizeof (struct locale_collate_t));
1508
1509           /* Init the various data structures.  */
1510           init_hash (&collate->elem_table, 100);
1511           init_hash (&collate->sym_table, 100);
1512           init_hash (&collate->seq_table, 500);
1513           obstack_init (&collate->mempool);
1514
1515           collate->col_weight_max = -1;
1516           collate->codepoint_collation = false;
1517         }
1518       else
1519         /* Reuse the copy_locale's data structures.  */
1520         collate = locale->categories[LC_COLLATE].collate =
1521           copy_locale->categories[LC_COLLATE].collate;
1522     }
1523
1524   ldfile->translate_strings = 0;
1525   ldfile->return_widestr = 0;
1526 }
1527
1528
1529 void
1530 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1531 {
1532   /* Now is the time when we can assign the individual collation
1533      values for all the symbols.  We have possibly different values
1534      for the wide- and the multibyte-character symbols.  This is done
1535      since it might make a difference in the encoding if there is in
1536      some cases no multibyte-character but there are wide-characters.
1537      (The other way around it is not important since theencoded
1538      collation value in the wide-character case is 32 bits wide and
1539      therefore requires no encoding).
1540
1541      The lowest collation value assigned is 2.  Zero is reserved for
1542      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1543      functions and 1 is used to separate the individual passes for the
1544      different rules.
1545
1546      We also have to construct is list with all the bytes/words which
1547      can come first in a sequence, followed by all the elements which
1548      also start with this byte/word.  The order is reverse which has
1549      among others the important effect that longer strings are located
1550      first in the list.  This is required for the output data since
1551      the algorithm used in `strcoll' etc depends on this.
1552
1553      The multibyte case is easy.  We simply sort into an array with
1554      256 elements.  */
1555   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1556   int mbact[nrules];
1557   int wcact;
1558   int mbseqact;
1559   int wcseqact;
1560   struct element_t *runp;
1561   int i;
1562   int need_undefined = 0;
1563   struct section_list *sect;
1564   int ruleidx;
1565
1566   if (collate == NULL)
1567     {
1568       /* No data, no check. Issue a warning.  */
1569       record_warning (_("No definition for %s category found"),
1570                       "LC_COLLATE");
1571       return;
1572     }
1573
1574   /* No data required.  */
1575   if (collate->codepoint_collation)
1576     return;
1577
1578   /* If this assertion is hit change the type in `element_t'.  */
1579   assert (nrules <= sizeof (runp->used_in_level) * 8);
1580
1581   /* Make sure that the `position' rule is used either in all sections
1582      or in none.  */
1583   for (i = 0; i < nrules; ++i)
1584     for (sect = collate->sections; sect != NULL; sect = sect->next)
1585       if (sect != collate->current_section
1586           && sect->rules != NULL
1587           && ((sect->rules[i] & sort_position)
1588               != (collate->current_section->rules[i] & sort_position)))
1589         {
1590           record_error (0, 0, _("\
1591 %s: `position' must be used for a specific level in all sections or none"),
1592                         "LC_COLLATE");
1593           break;
1594         }
1595
1596   /* Find out which elements are used at which level.  At the same
1597      time we find out whether we have any undefined symbols.  */
1598   runp = collate->start;
1599   while (runp != NULL)
1600     {
1601       if (runp->mbs != NULL)
1602         {
1603           for (i = 0; i < nrules; ++i)
1604             {
1605               int j;
1606
1607               for (j = 0; j < runp->weights[i].cnt; ++j)
1608                 /* A NULL pointer as the weight means IGNORE.  */
1609                 if (runp->weights[i].w[j] != NULL)
1610                   {
1611                     if (runp->weights[i].w[j]->weights == NULL)
1612                       {
1613                         record_error_at_line (0, 0, runp->file, runp->line,
1614                                               _("symbol `%s' not defined"),
1615                                               runp->weights[i].w[j]->name);
1616
1617                         need_undefined = 1;
1618                         runp->weights[i].w[j] = &collate->undefined;
1619                       }
1620                     else
1621                       /* Set the bit for the level.  */
1622                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1623                   }
1624             }
1625         }
1626
1627       /* Up to the next entry.  */
1628       runp = runp->next;
1629     }
1630
1631   /* Walk through the list of defined sequences and assign weights.  Also
1632      create the data structure which will allow generating the single byte
1633      character based tables.
1634
1635      Since at each time only the weights for each of the rules are
1636      only compared to other weights for this rule it is possible to
1637      assign more compact weight values than simply counting all
1638      weights in sequence.  We can assign weights from 3, one for each
1639      rule individually and only for those elements, which are actually
1640      used for this rule.
1641
1642      Why is this important?  It is not for the wide char table.  But
1643      it is for the singlebyte output since here larger numbers have to
1644      be encoded to make it possible to emit the value as a byte
1645      string.  */
1646   for (i = 0; i < nrules; ++i)
1647     mbact[i] = 2;
1648   wcact = 2;
1649   mbseqact = 0;
1650   wcseqact = 0;
1651   runp = collate->start;
1652   while (runp != NULL)
1653     {
1654       /* Determine the order.  */
1655       if (runp->used_in_level != 0)
1656         {
1657           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1658                                                  nrules * sizeof (int));
1659
1660           for (i = 0; i < nrules; ++i)
1661             if ((runp->used_in_level & (1 << i)) != 0)
1662               runp->mborder[i] = mbact[i]++;
1663             else
1664               runp->mborder[i] = 0;
1665         }
1666
1667       if (runp->mbs != NULL)
1668         {
1669           struct element_t **eptr;
1670           struct element_t *lastp = NULL;
1671
1672           /* Find the point where to insert in the list.  */
1673           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1674           while (*eptr != NULL)
1675             {
1676               if ((*eptr)->nmbs < runp->nmbs)
1677                 break;
1678
1679               if ((*eptr)->nmbs == runp->nmbs)
1680                 {
1681                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1682
1683                   if (c == 0)
1684                     {
1685                       /* This should not happen.  It means that we have
1686                          to symbols with the same byte sequence.  It is
1687                          of course an error.  */
1688                       record_error_at_line (0, 0, (*eptr)->file,
1689                                             (*eptr)->line,
1690                                             _("\
1691 symbol `%s' has the same encoding as"), (*eptr)->name);
1692
1693                       record_error_at_line (0, 0, runp->file, runp->line,
1694                                             _("symbol `%s'"), runp->name);
1695                       goto dont_insert;
1696                     }
1697                   else if (c < 0)
1698                     /* Insert it here.  */
1699                     break;
1700                 }
1701
1702               /* To the next entry.  */
1703               lastp = *eptr;
1704               eptr = &(*eptr)->mbnext;
1705             }
1706
1707           /* Set the pointers.  */
1708           runp->mbnext = *eptr;
1709           runp->mblast = lastp;
1710           if (*eptr != NULL)
1711             (*eptr)->mblast = runp;
1712           *eptr = runp;
1713         dont_insert:
1714           ;
1715         }
1716
1717       if (runp->used_in_level)
1718         runp->wcorder = wcact++;
1719
1720       if (runp->is_character)
1721         {
1722           if (runp->nmbs == 1)
1723             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1724
1725           runp->wcseqorder = wcseqact++;
1726         }
1727       else if (runp->mbs != NULL && runp->weights != NULL)
1728         /* This is for collation elements.  */
1729         runp->wcseqorder = wcseqact++;
1730
1731       /* Up to the next entry.  */
1732       runp = runp->next;
1733     }
1734
1735   /* Find out whether any of the `mbheads' entries is unset.  In this
1736      case we use the UNDEFINED entry.  */
1737   for (i = 1; i < 256; ++i)
1738     if (collate->mbheads[i] == NULL)
1739       {
1740         need_undefined = 1;
1741         collate->mbheads[i] = &collate->undefined;
1742       }
1743
1744   /* Now to the wide character case.  */
1745   collate->wcheads.p = 6;
1746   collate->wcheads.q = 10;
1747   wchead_table_init (&collate->wcheads);
1748
1749   collate->wcseqorder.p = 6;
1750   collate->wcseqorder.q = 10;
1751   collseq_table_init (&collate->wcseqorder);
1752
1753   /* Start adding.  */
1754   runp = collate->start;
1755   while (runp != NULL)
1756     {
1757       if (runp->wcs != NULL)
1758         {
1759           struct element_t *e;
1760           struct element_t **eptr;
1761           struct element_t *lastp;
1762
1763           /* Insert the collation sequence value.  */
1764           if (runp->is_character)
1765             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1766                                runp->wcseqorder);
1767
1768           /* Find the point where to insert in the list.  */
1769           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1770           eptr = &e;
1771           lastp = NULL;
1772           while (*eptr != NULL)
1773             {
1774               if ((*eptr)->nwcs < runp->nwcs)
1775                 break;
1776
1777               if ((*eptr)->nwcs == runp->nwcs)
1778                 {
1779                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1780                                    (wchar_t *) runp->wcs, runp->nwcs);
1781
1782                   if (c == 0)
1783                     {
1784                       /* This should not happen.  It means that we have
1785                          two symbols with the same byte sequence.  It is
1786                          of course an error.  */
1787                       record_error_at_line (0, 0, (*eptr)->file,
1788                                             (*eptr)->line,
1789                                             _("\
1790 symbol `%s' has the same encoding as"), (*eptr)->name);
1791
1792                       record_error_at_line (0, 0, runp->file, runp->line,
1793                                             _("symbol `%s'"), runp->name);
1794                       goto dont_insertwc;
1795                     }
1796                   else if (c < 0)
1797                     /* Insert it here.  */
1798                     break;
1799                 }
1800
1801               /* To the next entry.  */
1802               lastp = *eptr;
1803               eptr = &(*eptr)->wcnext;
1804             }
1805
1806           /* Set the pointers.  */
1807           runp->wcnext = *eptr;
1808           runp->wclast = lastp;
1809           if (*eptr != NULL)
1810             (*eptr)->wclast = runp;
1811           *eptr = runp;
1812           if (eptr == &e)
1813             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1814         dont_insertwc:
1815           ;
1816         }
1817
1818       /* Up to the next entry.  */
1819       runp = runp->next;
1820     }
1821
1822   /* Now determine whether the UNDEFINED entry is needed and if yes,
1823      whether it was defined.  */
1824   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1825   if (collate->undefined.file == NULL)
1826     {
1827       if (need_undefined)
1828         {
1829           /* This seems not to be enforced by recent standards.  Don't
1830              emit an error, simply append UNDEFINED at the end.  */
1831           collate->undefined.mborder =
1832             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1833
1834           for (i = 0; i < nrules; ++i)
1835             collate->undefined.mborder[i] = mbact[i]++;
1836         }
1837
1838       /* In any case we will need the definition for the wide character
1839          case.  But we will not complain that it is missing since the
1840          specification strangely enough does not seem to account for
1841          this.  */
1842       collate->undefined.wcorder = wcact++;
1843     }
1844
1845   /* Finally, try to unify the rules for the sections.  Whenever the rules
1846      for a section are the same as those for another section give the
1847      ruleset the same index.  Since there are never many section we can
1848      use an O(n^2) algorithm here.  */
1849   sect = collate->sections;
1850   while (sect != NULL && sect->rules == NULL)
1851     sect = sect->next;
1852
1853   /* Bail out if we have no sections because of earlier errors.  */
1854   if (sect == NULL)
1855     {
1856       record_error (EXIT_FAILURE, 0, _("too many errors; giving up"));
1857       return;
1858     }
1859
1860   ruleidx = 0;
1861   do
1862     {
1863       struct section_list *osect = collate->sections;
1864
1865       while (osect != sect)
1866         if (osect->rules != NULL
1867             && memcmp (osect->rules, sect->rules,
1868                        nrules * sizeof (osect->rules[0])) == 0)
1869           break;
1870         else
1871           osect = osect->next;
1872
1873       if (osect == sect)
1874         sect->ruleidx = ruleidx++;
1875       else
1876         sect->ruleidx = osect->ruleidx;
1877
1878       /* Next section.  */
1879       do
1880         sect = sect->next;
1881       while (sect != NULL && sect->rules == NULL);
1882     }
1883   while (sect != NULL);
1884   /* We are currently not prepared for more than 128 rulesets.  But this
1885      should never really be a problem.  */
1886   assert (ruleidx <= 128);
1887 }
1888
1889
1890 static int32_t
1891 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1892                struct element_t *elem)
1893 {
1894   size_t cnt;
1895   int32_t retval;
1896
1897   /* Optimize the use of UNDEFINED.  */
1898   if (elem == &collate->undefined)
1899     /* The weights are already inserted.  */
1900     return 0;
1901
1902   /* This byte can start exactly one collation element and this is
1903      a single byte.  We can directly give the index to the weights.  */
1904   retval = obstack_object_size (pool);
1905
1906   /* Construct the weight.  */
1907   for (cnt = 0; cnt < nrules; ++cnt)
1908     {
1909       char buf[elem->weights[cnt].cnt * 7];
1910       int len = 0;
1911       int i;
1912
1913       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1914         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1915         if (elem->weights[cnt].w[i] != NULL)
1916           len += utf8_encode (&buf[len],
1917                               elem->weights[cnt].w[i]->mborder[cnt]);
1918
1919       /* And add the buffer content.  */
1920       obstack_1grow (pool, len);
1921       obstack_grow (pool, buf, len);
1922     }
1923
1924   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1925 }
1926
1927
1928 static int32_t
1929 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1930                  struct element_t *elem)
1931 {
1932   size_t cnt;
1933   int32_t retval;
1934
1935   /* Optimize the use of UNDEFINED.  */
1936   if (elem == &collate->undefined)
1937     /* The weights are already inserted.  */
1938     return 0;
1939
1940   /* This byte can start exactly one collation element and this is
1941      a single byte.  We can directly give the index to the weights.  */
1942   retval = obstack_object_size (pool) / sizeof (int32_t);
1943
1944   /* Construct the weight.  */
1945   for (cnt = 0; cnt < nrules; ++cnt)
1946     {
1947       int32_t buf[elem->weights[cnt].cnt];
1948       int i;
1949       int32_t j;
1950
1951       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1952         if (elem->weights[cnt].w[i] != NULL)
1953           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1954
1955       /* And add the buffer content.  */
1956       obstack_int32_grow (pool, j);
1957
1958       obstack_grow (pool, buf, j * sizeof (int32_t));
1959       maybe_swap_uint32_obstack (pool, j);
1960     }
1961
1962   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1963 }
1964
1965 /* If localedef is every threaded, this would need to be __thread var.  */
1966 static struct
1967 {
1968   struct obstack *weightpool;
1969   struct obstack *extrapool;
1970   struct obstack *indpool;
1971   struct locale_collate_t *collate;
1972   struct collidx_table *tablewc;
1973 } atwc;
1974
1975 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1976
1977 static void
1978 add_to_tablewc (uint32_t ch, struct element_t *runp)
1979 {
1980   if (runp->wcnext == NULL && runp->nwcs == 1)
1981     {
1982       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1983                                            runp);
1984       collidx_table_add (atwc.tablewc, ch, weigthidx);
1985     }
1986   else
1987     {
1988       /* As for the singlebyte table, we recognize sequences and
1989          compress them.  */
1990
1991       collidx_table_add (atwc.tablewc, ch,
1992                          -(obstack_object_size (atwc.extrapool)
1993                          / sizeof (uint32_t)));
1994
1995       do
1996         {
1997           /* Store the current index in the weight table.  We know that
1998              the current position in the `extrapool' is aligned on a
1999              32-bit address.  */
2000           int32_t weightidx;
2001           int added;
2002
2003           /* Find out wether this is a single entry or we have more than
2004              one consecutive entry.  */
2005           if (runp->wcnext != NULL
2006               && runp->nwcs == runp->wcnext->nwcs
2007               && wmemcmp ((wchar_t *) runp->wcs,
2008                           (wchar_t *)runp->wcnext->wcs,
2009                           runp->nwcs - 1) == 0
2010               && (runp->wcs[runp->nwcs - 1]
2011                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2012             {
2013               int i;
2014               struct element_t *series_startp = runp;
2015               struct element_t *curp;
2016
2017               /* Now add first the initial byte sequence.  */
2018               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2019               if (sizeof (int32_t) == sizeof (int))
2020                 obstack_make_room (atwc.extrapool, added);
2021
2022               /* More than one consecutive entry.  We mark this by having
2023                  a negative index into the indirect table.  */
2024               obstack_int32_grow_fast (atwc.extrapool,
2025                                        -(obstack_object_size (atwc.indpool)
2026                                          / sizeof (int32_t)));
2027               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2028
2029               do
2030                 runp = runp->wcnext;
2031               while (runp->wcnext != NULL
2032                      && runp->nwcs == runp->wcnext->nwcs
2033                      && wmemcmp ((wchar_t *) runp->wcs,
2034                                  (wchar_t *)runp->wcnext->wcs,
2035                                  runp->nwcs - 1) == 0
2036                      && (runp->wcs[runp->nwcs - 1]
2037                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2038
2039               /* Now walk backward from here to the beginning.  */
2040               curp = runp;
2041
2042               for (i = 1; i < runp->nwcs; ++i)
2043                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2044
2045               /* Now find the end of the consecutive sequence and
2046                  add all the indices in the indirect pool.  */
2047               do
2048                 {
2049                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2050                                                curp);
2051                   obstack_int32_grow (atwc.indpool, weightidx);
2052
2053                   curp = curp->wclast;
2054                 }
2055               while (curp != series_startp);
2056
2057               /* Add the final weight.  */
2058               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2059                                            curp);
2060               obstack_int32_grow (atwc.indpool, weightidx);
2061
2062               /* And add the end byte sequence.  Without length this
2063                  time.  */
2064               for (i = 1; i < curp->nwcs; ++i)
2065                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2066             }
2067           else
2068             {
2069               /* A single entry.  Simply add the index and the length and
2070                  string (except for the first character which is already
2071                  tested for).  */
2072               int i;
2073
2074               /* Output the weight info.  */
2075               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2076                                            runp);
2077
2078               assert (runp->nwcs > 0);
2079               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2080               if (sizeof (int) == sizeof (int32_t))
2081                 obstack_make_room (atwc.extrapool, added);
2082
2083               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2084               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2085               for (i = 1; i < runp->nwcs; ++i)
2086                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2087             }
2088
2089           /* Next entry.  */
2090           runp = runp->wcnext;
2091         }
2092       while (runp != NULL);
2093     }
2094 }
2095
2096 /* Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and
2097    _NL_COLLATE_COLLSEQWC.  */
2098 #include "C-collate-seq.c"
2099
2100 void
2101 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2102                 const char *output_path)
2103 {
2104   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2105   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2106   struct locale_file file;
2107   size_t ch;
2108   int32_t tablemb[256];
2109   struct obstack weightpool;
2110   struct obstack extrapool;
2111   struct obstack indirectpool;
2112   struct section_list *sect;
2113   struct collidx_table tablewc;
2114   uint32_t elem_size;
2115   uint32_t *elem_table;
2116   int i;
2117   struct element_t *runp;
2118
2119   init_locale_data (&file, nelems);
2120   add_locale_uint32 (&file, nrules);
2121
2122   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2123   if (collate == NULL || collate->codepoint_collation)
2124     {
2125       size_t idx;
2126       for (idx = 1; idx < nelems; idx++)
2127         {
2128           /* The words have to be handled specially.  */
2129           if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2130             add_locale_uint32 (&file, 0);
2131           else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
2132                    && collate != NULL)
2133             /* A valid LC_COLLATE must have a code set name.  */
2134             add_locale_string (&file, charmap->code_set_name);
2135           else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB)
2136                    && collate != NULL)
2137             add_locale_raw_data (&file, collseqmb, sizeof (collseqmb));
2138           else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)
2139                    && collate != NULL)
2140             add_locale_uint32_array (&file, collseqwc,
2141                                      array_length (collseqwc));
2142           else
2143             add_locale_empty (&file);
2144         }
2145       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2146       return;
2147     }
2148
2149   obstack_init (&weightpool);
2150   obstack_init (&extrapool);
2151   obstack_init (&indirectpool);
2152
2153   /* Since we are using the sign of an integer to mark indirection the
2154      offsets in the arrays we are indirectly referring to must not be
2155      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2156   obstack_int32_grow (&extrapool, 0);
2157   obstack_int32_grow (&indirectpool, 0);
2158
2159   /* Prepare the ruleset table.  */
2160   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2161     if (sect->rules != NULL && sect->ruleidx == i)
2162       {
2163         int j;
2164
2165         obstack_make_room (&weightpool, nrules);
2166
2167         for (j = 0; j < nrules; ++j)
2168           obstack_1grow_fast (&weightpool, sect->rules[j]);
2169         ++i;
2170       }
2171   /* And align the output.  */
2172   i = (nrules * i) % LOCFILE_ALIGN;
2173   if (i > 0)
2174     do
2175       obstack_1grow (&weightpool, '\0');
2176     while (++i < LOCFILE_ALIGN);
2177
2178   add_locale_raw_obstack (&file, &weightpool);
2179
2180   /* Generate the 8-bit table.  Walk through the lists of sequences
2181      starting with the same byte and add them one after the other to
2182      the table.  In case we have more than one sequence starting with
2183      the same byte we have to use extra indirection.
2184
2185      First add a record for the NUL byte.  This entry will never be used
2186      so it does not matter.  */
2187   tablemb[0] = 0;
2188
2189   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2190      will probably be used more than once it is good to store the
2191      weights only once.  */
2192   if (collate->undefined.used_in_level != 0)
2193     output_weight (&weightpool, collate, &collate->undefined);
2194
2195   for (ch = 1; ch < 256; ++ch)
2196     if (collate->mbheads[ch]->mbnext == NULL
2197         && collate->mbheads[ch]->nmbs <= 1)
2198       {
2199         tablemb[ch] = output_weight (&weightpool, collate,
2200                                      collate->mbheads[ch]);
2201       }
2202     else
2203       {
2204         /* The entries in the list are sorted by length and then
2205            alphabetically.  This is the order in which we will add the
2206            elements to the collation table.  This allows simply walking
2207            the table in sequence and stopping at the first matching
2208            entry.  Since the longer sequences are coming first in the
2209            list they have the possibility to match first, just as it
2210            has to be.  In the worst case we are walking to the end of
2211            the list where we put, if no singlebyte sequence is defined
2212            in the locale definition, the weights for UNDEFINED.
2213
2214            To reduce the length of the search list we compress them a bit.
2215            This happens by collecting sequences of consecutive byte
2216            sequences in one entry (having and begin and end byte sequence)
2217            and add only one index into the weight table.  We can find the
2218            consecutive entries since they are also consecutive in the list.  */
2219         struct element_t *runp = collate->mbheads[ch];
2220         struct element_t *lastp;
2221
2222         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2223
2224         tablemb[ch] = -obstack_object_size (&extrapool);
2225
2226         do
2227           {
2228             /* Store the current index in the weight table.  We know that
2229                the current position in the `extrapool' is aligned on a
2230                32-bit address.  */
2231             int32_t weightidx;
2232             int added;
2233
2234             /* Find out wether this is a single entry or we have more than
2235                one consecutive entry.  */
2236             if (runp->mbnext != NULL
2237                 && runp->nmbs == runp->mbnext->nmbs
2238                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2239                 && (runp->mbs[runp->nmbs - 1]
2240                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2241               {
2242                 int i;
2243                 struct element_t *series_startp = runp;
2244                 struct element_t *curp;
2245
2246                 /* Compute how much space we will need.  */
2247                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2248                                           + 2 * (runp->nmbs - 1));
2249                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2250                 obstack_make_room (&extrapool, added);
2251
2252                 /* More than one consecutive entry.  We mark this by having
2253                    a negative index into the indirect table.  */
2254                 obstack_int32_grow_fast (&extrapool,
2255                                          -(obstack_object_size (&indirectpool)
2256                                            / sizeof (int32_t)));
2257
2258                 /* Now search first the end of the series.  */
2259                 do
2260                   runp = runp->mbnext;
2261                 while (runp->mbnext != NULL
2262                        && runp->nmbs == runp->mbnext->nmbs
2263                        && memcmp (runp->mbs, runp->mbnext->mbs,
2264                                   runp->nmbs - 1) == 0
2265                        && (runp->mbs[runp->nmbs - 1]
2266                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2267
2268                 /* Now walk backward from here to the beginning.  */
2269                 curp = runp;
2270
2271                 assert (runp->nmbs <= 256);
2272                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2273                 for (i = 1; i < curp->nmbs; ++i)
2274                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2275
2276                 /* Now find the end of the consecutive sequence and
2277                    add all the indices in the indirect pool.  */
2278                 do
2279                   {
2280                     weightidx = output_weight (&weightpool, collate, curp);
2281                     obstack_int32_grow (&indirectpool, weightidx);
2282
2283                     curp = curp->mblast;
2284                   }
2285                 while (curp != series_startp);
2286
2287                 /* Add the final weight.  */
2288                 weightidx = output_weight (&weightpool, collate, curp);
2289                 obstack_int32_grow (&indirectpool, weightidx);
2290
2291                 /* And add the end byte sequence.  Without length this
2292                    time.  */
2293                 for (i = 1; i < curp->nmbs; ++i)
2294                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2295               }
2296             else
2297               {
2298                 /* A single entry.  Simply add the index and the length and
2299                    string (except for the first character which is already
2300                    tested for).  */
2301                 int i;
2302
2303                 /* Output the weight info.  */
2304                 weightidx = output_weight (&weightpool, collate, runp);
2305
2306                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2307                                           + runp->nmbs - 1);
2308                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2309                 obstack_make_room (&extrapool, added);
2310
2311                 obstack_int32_grow_fast (&extrapool, weightidx);
2312                 assert (runp->nmbs <= 256);
2313                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2314
2315                 for (i = 1; i < runp->nmbs; ++i)
2316                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2317               }
2318
2319             /* Add alignment bytes if necessary.  */
2320             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2321               obstack_1grow_fast (&extrapool, '\0');
2322
2323             /* Next entry.  */
2324             lastp = runp;
2325             runp = runp->mbnext;
2326           }
2327         while (runp != NULL);
2328
2329         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2330
2331         /* If the final entry in the list is not a single character we
2332            add an UNDEFINED entry here.  */
2333         if (lastp->nmbs != 1)
2334           {
2335             int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2336             obstack_make_room (&extrapool, added);
2337
2338             obstack_int32_grow_fast (&extrapool, 0);
2339             /* XXX What rule? We just pick the first.  */
2340             obstack_1grow_fast (&extrapool, 0);
2341             /* Length is zero.  */
2342             obstack_1grow_fast (&extrapool, 0);
2343
2344             /* Add alignment bytes if necessary.  */
2345             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2346               obstack_1grow_fast (&extrapool, '\0');
2347           }
2348       }
2349
2350   /* Add padding to the tables if necessary.  */
2351   while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2352     obstack_1grow (&weightpool, 0);
2353
2354   /* Now add the four tables.  */
2355   add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2356   add_locale_raw_obstack (&file, &weightpool);
2357   add_locale_raw_obstack (&file, &extrapool);
2358   add_locale_raw_obstack (&file, &indirectpool);
2359
2360   /* Now the same for the wide character table.  We need to store some
2361      more information here.  */
2362   add_locale_empty (&file);
2363   add_locale_empty (&file);
2364   add_locale_empty (&file);
2365
2366   /* Since we are using the sign of an integer to mark indirection the
2367      offsets in the arrays we are indirectly referring to must not be
2368      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2369   obstack_int32_grow (&extrapool, 0);
2370   obstack_int32_grow (&indirectpool, 0);
2371
2372   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2373      will probably be used more than once it is good to store the
2374      weights only once.  */
2375   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2376     abort ();
2377
2378   /* Generate the table.  Walk through the lists of sequences starting
2379      with the same wide character and add them one after the other to
2380      the table.  In case we have more than one sequence starting with
2381      the same byte we have to use extra indirection.  */
2382   tablewc.p = 6;
2383   tablewc.q = 10;
2384   collidx_table_init (&tablewc);
2385
2386   atwc.weightpool = &weightpool;
2387   atwc.extrapool = &extrapool;
2388   atwc.indpool = &indirectpool;
2389   atwc.collate = collate;
2390   atwc.tablewc = &tablewc;
2391
2392   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2393
2394   memset (&atwc, 0, sizeof (atwc));
2395
2396   /* Now add the four tables.  */
2397   add_locale_collidx_table (&file, &tablewc);
2398   add_locale_raw_obstack (&file, &weightpool);
2399   add_locale_raw_obstack (&file, &extrapool);
2400   add_locale_raw_obstack (&file, &indirectpool);
2401
2402   /* Finally write the table with collation element names out.  It is
2403      a hash table with a simple function which gets the name of the
2404      character as the input.  One character might have many names.  The
2405      value associated with the name is an index into the weight table
2406      where we are then interested in the first-level weight value.
2407
2408      To determine how large the table should be we are counting the
2409      elements have to put in.  Since we are using internal chaining
2410      using a secondary hash function we have to make the table a bit
2411      larger to avoid extremely long search times.  We can achieve
2412      good results with a 40% larger table than there are entries.  */
2413   elem_size = 0;
2414   runp = collate->start;
2415   while (runp != NULL)
2416     {
2417       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2418         /* Yep, the element really counts.  */
2419         ++elem_size;
2420
2421       runp = runp->next;
2422     }
2423   /* Add 50% and find the next prime number.  */
2424   elem_size = next_prime (elem_size + (elem_size >> 1));
2425
2426   /* Allocate the table.  Each entry consists of two words: the hash
2427      value and an index in a secondary table which provides the index
2428      into the weight table and the string itself (so that a match can
2429      be determined).  */
2430   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2431                                            elem_size * 2 * sizeof (uint32_t));
2432   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2433
2434   /* Now add the elements.  */
2435   runp = collate->start;
2436   while (runp != NULL)
2437     {
2438       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2439         {
2440           /* Compute the hash value of the name.  */
2441           uint32_t namelen = strlen (runp->name);
2442           uint32_t hash = elem_hash (runp->name, namelen);
2443           size_t idx = hash % elem_size;
2444 #ifndef NDEBUG
2445           size_t start_idx = idx;
2446 #endif
2447
2448           if (elem_table[idx * 2] != 0)
2449             {
2450               /* The spot is already taken.  Try iterating using the value
2451                  from the secondary hashing function.  */
2452               size_t iter = hash % (elem_size - 2) + 1;
2453
2454               do
2455                 {
2456                   idx += iter;
2457                   if (idx >= elem_size)
2458                     idx -= elem_size;
2459                   assert (idx != start_idx);
2460                 }
2461               while (elem_table[idx * 2] != 0);
2462             }
2463           /* This is the spot where we will insert the value.  */
2464           elem_table[idx * 2] = hash;
2465           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2466
2467           /* The string itself including length.  */
2468           obstack_1grow (&extrapool, namelen);
2469           obstack_grow (&extrapool, runp->name, namelen);
2470
2471           /* And the multibyte representation.  */
2472           obstack_1grow (&extrapool, runp->nmbs);
2473           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2474
2475           /* And align again to 32 bits.  */
2476           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2477             obstack_grow (&extrapool, "\0\0",
2478                           (sizeof (int32_t)
2479                            - ((1 + namelen + 1 + runp->nmbs)
2480                               % sizeof (int32_t))));
2481
2482           /* Now some 32-bit values: multibyte collation sequence,
2483              wide char string (including length), and wide char
2484              collation sequence.  */
2485           obstack_int32_grow (&extrapool, runp->mbseqorder);
2486
2487           obstack_int32_grow (&extrapool, runp->nwcs);
2488           obstack_grow (&extrapool, runp->wcs,
2489                         runp->nwcs * sizeof (uint32_t));
2490           maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2491
2492           obstack_int32_grow (&extrapool, runp->wcseqorder);
2493         }
2494
2495       runp = runp->next;
2496     }
2497
2498   /* Prepare to write out this data.  */
2499   add_locale_uint32 (&file, elem_size);
2500   add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2501   add_locale_raw_obstack (&file, &extrapool);
2502   add_locale_raw_data (&file, collate->mbseqorder, 256);
2503   add_locale_collseq_table (&file, &collate->wcseqorder);
2504   add_locale_string (&file, charmap->code_set_name);
2505   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2506
2507   obstack_free (&weightpool, NULL);
2508   obstack_free (&extrapool, NULL);
2509   obstack_free (&indirectpool, NULL);
2510 }
2511
2512
2513 static enum token_t
2514 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2515          const struct charmap_t *charmap, int to_endif)
2516 {
2517   while (1)
2518     {
2519       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2520       enum token_t nowtok = now->tok;
2521
2522       if (nowtok == tok_eof || nowtok == tok_end)
2523         return nowtok;
2524
2525       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2526         {
2527           lr_error (ldfile, _("%s: nested conditionals not supported"),
2528                     "LC_COLLATE");
2529           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2530           if (nowtok == tok_eof || nowtok == tok_end)
2531             return nowtok;
2532         }
2533       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2534         {
2535           lr_ignore_rest (ldfile, 1);
2536           return nowtok;
2537         }
2538       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2539         {
2540           /* Do not read the rest of the line.  */
2541           return nowtok;
2542         }
2543       else if (nowtok == tok_else)
2544         {
2545           lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2546         }
2547
2548       lr_ignore_rest (ldfile, 0);
2549     }
2550 }
2551
2552
2553 void
2554 collate_read (struct linereader *ldfile, struct localedef_t *result,
2555               const struct charmap_t *charmap, const char *repertoire_name,
2556               int ignore_content)
2557 {
2558   struct repertoire_t *repertoire = NULL;
2559   struct locale_collate_t *collate;
2560   struct token *now;
2561   struct token *arg = NULL;
2562   enum token_t nowtok;
2563   enum token_t was_ellipsis = tok_none;
2564   struct localedef_t *copy_locale = NULL;
2565   /* Parsing state:
2566      0 - start
2567      1 - between `order-start' and `order-end'
2568      2 - after `order-end'
2569      3 - after `reorder-after', waiting for `reorder-end'
2570      4 - after `reorder-end'
2571      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572      6 - after `reorder-sections-end'
2573   */
2574   int state = 0;
2575
2576   /* Get the repertoire we have to use.  */
2577   if (repertoire_name != NULL)
2578     repertoire = repertoire_read (repertoire_name);
2579
2580   /* The rest of the line containing `LC_COLLATE' must be free.  */
2581   lr_ignore_rest (ldfile, 1);
2582
2583   while (1)
2584     {
2585       do
2586         {
2587           now = lr_token (ldfile, charmap, result, NULL, verbose);
2588           nowtok = now->tok;
2589         }
2590       while (nowtok == tok_eol);
2591
2592       if (nowtok != tok_define)
2593         break;
2594
2595       if (ignore_content)
2596         lr_ignore_rest (ldfile, 0);
2597       else
2598         {
2599           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2600           if (arg->tok != tok_ident)
2601             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2602           else
2603             {
2604               /* Simply add the new symbol.  */
2605               struct name_list *newsym = xmalloc (sizeof (*newsym)
2606                                                   + arg->val.str.lenmb + 1);
2607               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2608               newsym->str[arg->val.str.lenmb] = '\0';
2609               newsym->next = defined;
2610               defined = newsym;
2611
2612               lr_ignore_rest (ldfile, 1);
2613             }
2614         }
2615     }
2616
2617   if (nowtok == tok_copy)
2618     {
2619       now = lr_token (ldfile, charmap, result, NULL, verbose);
2620       if (now->tok != tok_string)
2621         {
2622           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2623
2624         skip_category:
2625           do
2626             now = lr_token (ldfile, charmap, result, NULL, verbose);
2627           while (now->tok != tok_eof && now->tok != tok_end);
2628
2629           if (now->tok != tok_eof
2630               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2631                   now->tok == tok_eof))
2632             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2633           else if (now->tok != tok_lc_collate)
2634             {
2635               lr_error (ldfile, _("\
2636 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637               lr_ignore_rest (ldfile, 0);
2638             }
2639           else
2640             lr_ignore_rest (ldfile, 1);
2641
2642           return;
2643         }
2644
2645       if (! ignore_content)
2646         {
2647           /* Get the locale definition.  */
2648           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2649                                      repertoire_name, charmap, NULL);
2650           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2651             {
2652               /* Not yet loaded.  So do it now.  */
2653               if (locfile_read (copy_locale, charmap) != 0)
2654                 goto skip_category;
2655             }
2656
2657           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2658             return;
2659         }
2660
2661       lr_ignore_rest (ldfile, 1);
2662
2663       now = lr_token (ldfile, charmap, result, NULL, verbose);
2664       nowtok = now->tok;
2665     }
2666
2667   /* Prepare the data structures.  */
2668   collate_startup (ldfile, result, copy_locale, ignore_content);
2669   collate = result->categories[LC_COLLATE].collate;
2670
2671   while (1)
2672     {
2673       char ucs4buf[10];
2674       char *symstr;
2675       size_t symlen;
2676
2677       /* Of course we don't proceed beyond the end of file.  */
2678       if (nowtok == tok_eof)
2679         break;
2680
2681       /* Ingore empty lines.  */
2682       if (nowtok == tok_eol)
2683         {
2684           now = lr_token (ldfile, charmap, result, NULL, verbose);
2685           nowtok = now->tok;
2686           continue;
2687         }
2688
2689       switch (nowtok)
2690         {
2691         case tok_codepoint_collation:
2692           collate->codepoint_collation = true;
2693           break;
2694
2695         case tok_copy:
2696           /* Allow copying other locales.  */
2697           now = lr_token (ldfile, charmap, result, NULL, verbose);
2698           if (now->tok != tok_string)
2699             goto err_label;
2700
2701           if (! ignore_content)
2702             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2703                          charmap, result);
2704
2705           lr_ignore_rest (ldfile, 1);
2706           break;
2707
2708         case tok_coll_weight_max:
2709           /* Ignore the rest of the line if we don't need the input of
2710              this line.  */
2711           if (ignore_content)
2712             {
2713               lr_ignore_rest (ldfile, 0);
2714               break;
2715             }
2716
2717           if (state != 0)
2718             goto err_label;
2719
2720           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2721           if (arg->tok != tok_number)
2722             goto err_label;
2723           if (collate->col_weight_max != -1)
2724             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2725                       "LC_COLLATE", "col_weight_max");
2726           else
2727             collate->col_weight_max = arg->val.num;
2728           lr_ignore_rest (ldfile, 1);
2729           break;
2730
2731         case tok_section_symbol:
2732           /* Ignore the rest of the line if we don't need the input of
2733              this line.  */
2734           if (ignore_content)
2735             {
2736               lr_ignore_rest (ldfile, 0);
2737               break;
2738             }
2739
2740           if (state != 0)
2741             goto err_label;
2742
2743           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2744           if (arg->tok != tok_bsymbol)
2745             goto err_label;
2746           else if (!ignore_content)
2747             {
2748               /* Check whether this section is already known.  */
2749               struct section_list *known = collate->sections;
2750               while (known != NULL)
2751                 {
2752                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2753                     break;
2754                   known = known->next;
2755                 }
2756
2757               if (known != NULL)
2758                 {
2759                   lr_error (ldfile,
2760                             _("%s: duplicate declaration of section `%s'"),
2761                             "LC_COLLATE", arg->val.str.startmb);
2762                   free (arg->val.str.startmb);
2763                 }
2764               else
2765                 collate->sections = make_seclist_elem (collate,
2766                                                        arg->val.str.startmb,
2767                                                        collate->sections);
2768
2769               lr_ignore_rest (ldfile, known == NULL);
2770             }
2771           else
2772             {
2773               free (arg->val.str.startmb);
2774               lr_ignore_rest (ldfile, 0);
2775             }
2776           break;
2777
2778         case tok_collating_element:
2779           /* Ignore the rest of the line if we don't need the input of
2780              this line.  */
2781           if (ignore_content)
2782             {
2783               lr_ignore_rest (ldfile, 0);
2784               break;
2785             }
2786
2787           if (state != 0 && state != 2)
2788             goto err_label;
2789
2790           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2791           if (arg->tok != tok_bsymbol)
2792             goto err_label;
2793           else
2794             {
2795               const char *symbol = arg->val.str.startmb;
2796               size_t symbol_len = arg->val.str.lenmb;
2797
2798               /* Next the `from' keyword.  */
2799               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2800               if (arg->tok != tok_from)
2801                 {
2802                   free ((char *) symbol);
2803                   goto err_label;
2804                 }
2805
2806               ldfile->return_widestr = 1;
2807               ldfile->translate_strings = 1;
2808
2809               /* Finally the string with the replacement.  */
2810               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2811
2812               ldfile->return_widestr = 0;
2813               ldfile->translate_strings = 0;
2814
2815               if (arg->tok != tok_string)
2816                 goto err_label;
2817
2818               if (!ignore_content && symbol != NULL)
2819                 {
2820                   /* The name is already defined.  */
2821                   if (check_duplicate (ldfile, collate, charmap,
2822                                        repertoire, symbol, symbol_len))
2823                     goto col_elem_free;
2824
2825                   if (arg->val.str.startmb != NULL)
2826                     insert_entry (&collate->elem_table, symbol, symbol_len,
2827                                   new_element (collate,
2828                                                arg->val.str.startmb,
2829                                                arg->val.str.lenmb - 1,
2830                                                arg->val.str.startwc,
2831                                                symbol, symbol_len, 0));
2832                 }
2833               else
2834                 {
2835                 col_elem_free:
2836                   free ((char *) symbol);
2837                   free (arg->val.str.startmb);
2838                   free (arg->val.str.startwc);
2839                 }
2840               lr_ignore_rest (ldfile, 1);
2841             }
2842           break;
2843
2844         case tok_collating_symbol:
2845           /* Ignore the rest of the line if we don't need the input of
2846              this line.  */
2847           if (ignore_content)
2848             {
2849               lr_ignore_rest (ldfile, 0);
2850               break;
2851             }
2852
2853           if (state != 0 && state != 2)
2854             goto err_label;
2855
2856           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2857           if (arg->tok != tok_bsymbol)
2858             goto err_label;
2859           else
2860             {
2861               char *symbol = arg->val.str.startmb;
2862               size_t symbol_len = arg->val.str.lenmb;
2863               char *endsymbol = NULL;
2864               size_t endsymbol_len = 0;
2865               enum token_t ellipsis = tok_none;
2866
2867               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2868               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2869                 {
2870                   ellipsis = arg->tok;
2871
2872                   arg = lr_token (ldfile, charmap, result, repertoire,
2873                                   verbose);
2874                   if (arg->tok != tok_bsymbol)
2875                     {
2876                       free (symbol);
2877                       goto err_label;
2878                     }
2879
2880                   endsymbol = arg->val.str.startmb;
2881                   endsymbol_len = arg->val.str.lenmb;
2882
2883                   lr_ignore_rest (ldfile, 1);
2884                 }
2885               else if (arg->tok != tok_eol)
2886                 {
2887                   free (symbol);
2888                   goto err_label;
2889                 }
2890
2891               if (!ignore_content)
2892                 {
2893                   if (symbol == NULL
2894                       || (ellipsis != tok_none && endsymbol == NULL))
2895                     {
2896                       lr_error (ldfile, _("\
2897 %s: unknown character in collating symbol name"),
2898                                 "LC_COLLATE");
2899                       goto col_sym_free;
2900                     }
2901                   else if (ellipsis == tok_none)
2902                     {
2903                       /* A single symbol, no ellipsis.  */
2904                       if (check_duplicate (ldfile, collate, charmap,
2905                                            repertoire, symbol, symbol_len))
2906                         /* The name is already defined.  */
2907                         goto col_sym_free;
2908
2909                       insert_entry (&collate->sym_table, symbol, symbol_len,
2910                                     new_symbol (collate, symbol, symbol_len));
2911                     }
2912                   else if (symbol_len != endsymbol_len)
2913                     {
2914                     col_sym_inv_range:
2915                       lr_error (ldfile,
2916                                 _("invalid names for character range"));
2917                       goto col_sym_free;
2918                     }
2919                   else
2920                     {
2921                       /* Oh my, we have to handle an ellipsis.  First, as
2922                          usual, determine the common prefix and then
2923                          convert the rest into a range.  */
2924                       size_t prefixlen;
2925                       unsigned long int from;
2926                       unsigned long int to;
2927                       char *endp;
2928
2929                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2930                         if (symbol[prefixlen] != endsymbol[prefixlen])
2931                           break;
2932
2933                       /* Convert the rest into numbers.  */
2934                       symbol[symbol_len] = '\0';
2935                       from = strtoul (&symbol[prefixlen], &endp,
2936                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2937                       if (*endp != '\0')
2938                         goto col_sym_inv_range;
2939
2940                       endsymbol[symbol_len] = '\0';
2941                       to = strtoul (&endsymbol[prefixlen], &endp,
2942                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2943                       if (*endp != '\0')
2944                         goto col_sym_inv_range;
2945
2946                       if (from > to)
2947                         goto col_sym_inv_range;
2948
2949                       /* Now loop over all entries.  */
2950                       while (from <= to)
2951                         {
2952                           char *symbuf;
2953
2954                           symbuf = (char *) obstack_alloc (&collate->mempool,
2955                                                            symbol_len + 1);
2956
2957                           /* Create the name.  */
2958                           sprintf (symbuf,
2959                                    ellipsis == tok_ellipsis2
2960                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2961                                    (int) prefixlen, symbol,
2962                                    (int) (symbol_len - prefixlen), from);
2963
2964                           if (check_duplicate (ldfile, collate, charmap,
2965                                                repertoire, symbuf, symbol_len))
2966                             /* The name is already defined.  */
2967                             goto col_sym_free;
2968
2969                           insert_entry (&collate->sym_table, symbuf,
2970                                         symbol_len,
2971                                         new_symbol (collate, symbuf,
2972                                                     symbol_len));
2973
2974                           /* Increment the counter.  */
2975                           ++from;
2976                         }
2977
2978                       goto col_sym_free;
2979                     }
2980                 }
2981               else
2982                 {
2983                 col_sym_free:
2984                   free (symbol);
2985                   free (endsymbol);
2986                 }
2987             }
2988           break;
2989
2990         case tok_symbol_equivalence:
2991           /* Ignore the rest of the line if we don't need the input of
2992              this line.  */
2993           if (ignore_content)
2994             {
2995               lr_ignore_rest (ldfile, 0);
2996               break;
2997             }
2998
2999           if (state != 0)
3000             goto err_label;
3001
3002           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3003           if (arg->tok != tok_bsymbol)
3004             goto err_label;
3005           else
3006             {
3007               const char *newname = arg->val.str.startmb;
3008               size_t newname_len = arg->val.str.lenmb;
3009               const char *symname;
3010               size_t symname_len;
3011               void *symval;     /* Actually struct symbol_t*  */
3012
3013               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3014               if (arg->tok != tok_bsymbol)
3015                 {
3016                   free ((char *) newname);
3017                   goto err_label;
3018                 }
3019
3020               symname = arg->val.str.startmb;
3021               symname_len = arg->val.str.lenmb;
3022
3023               if (newname == NULL)
3024                 {
3025                   lr_error (ldfile, _("\
3026 %s: unknown character in equivalent definition name"),
3027                             "LC_COLLATE");
3028
3029                 sym_equiv_free:
3030                   free ((char *) newname);
3031                   free ((char *) symname);
3032                   break;
3033                 }
3034               if (symname == NULL)
3035                 {
3036                   lr_error (ldfile, _("\
3037 %s: unknown character in equivalent definition value"),
3038                             "LC_COLLATE");
3039                   goto sym_equiv_free;
3040                 }
3041
3042               /* See whether the symbol name is already defined.  */
3043               if (find_entry (&collate->sym_table, symname, symname_len,
3044                               &symval) != 0)
3045                 {
3046                   lr_error (ldfile, _("\
3047 %s: unknown symbol `%s' in equivalent definition"),
3048                             "LC_COLLATE", symname);
3049                   goto sym_equiv_free;
3050                 }
3051
3052               if (insert_entry (&collate->sym_table,
3053                                 newname, newname_len, symval) < 0)
3054                 {
3055                   lr_error (ldfile, _("\
3056 error while adding equivalent collating symbol"));
3057                   goto sym_equiv_free;
3058                 }
3059
3060               free ((char *) symname);
3061             }
3062           lr_ignore_rest (ldfile, 1);
3063           break;
3064
3065         case tok_script:
3066           /* Ignore the rest of the line if we don't need the input of
3067              this line.  */
3068           if (ignore_content)
3069             {
3070               lr_ignore_rest (ldfile, 0);
3071               break;
3072             }
3073
3074           /* We get told about the scripts we know.  */
3075           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3076           if (arg->tok != tok_bsymbol)
3077             goto err_label;
3078           else
3079             {
3080               struct section_list *runp = collate->known_sections;
3081               char *name;
3082
3083               while (runp != NULL)
3084                 if (strncmp (runp->name, arg->val.str.startmb,
3085                              arg->val.str.lenmb) == 0
3086                     && runp->name[arg->val.str.lenmb] == '\0')
3087                   break;
3088                 else
3089                   runp = runp->def_next;
3090
3091               if (runp != NULL)
3092                 {
3093                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3094                             runp->name);
3095                   lr_ignore_rest (ldfile, 0);
3096                   break;
3097                 }
3098
3099               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3100               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3101               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3102               name[arg->val.str.lenmb] = '\0';
3103               runp->name = name;
3104
3105               runp->def_next = collate->known_sections;
3106               collate->known_sections = runp;
3107             }
3108           lr_ignore_rest (ldfile, 1);
3109           break;
3110
3111         case tok_order_start:
3112           /* Ignore the rest of the line if we don't need the input of
3113              this line.  */
3114           if (ignore_content)
3115             {
3116               lr_ignore_rest (ldfile, 0);
3117               break;
3118             }
3119
3120           if (state != 0 && state != 1 && state != 2)
3121             goto err_label;
3122           state = 1;
3123
3124           /* The 14652 draft does not specify whether all `order_start' lines
3125              must contain the same number of sort-rules, but 14651 does.  So
3126              we require this here as well.  */
3127           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3128           if (arg->tok == tok_bsymbol)
3129             {
3130               /* This better should be a section name.  */
3131               struct section_list *sp = collate->known_sections;
3132               while (sp != NULL
3133                      && (sp->name == NULL
3134                          || strncmp (sp->name, arg->val.str.startmb,
3135                                      arg->val.str.lenmb) != 0
3136                          || sp->name[arg->val.str.lenmb] != '\0'))
3137                 sp = sp->def_next;
3138
3139               if (sp == NULL)
3140                 {
3141                   lr_error (ldfile, _("\
3142 %s: unknown section name `%.*s'"),
3143                             "LC_COLLATE", (int) arg->val.str.lenmb,
3144                             arg->val.str.startmb);
3145                   /* We use the error section.  */
3146                   collate->current_section = &collate->error_section;
3147
3148                   if (collate->error_section.first == NULL)
3149                     {
3150                       /* Insert &collate->error_section at the end of
3151                          the collate->sections list.  */
3152                       if (collate->sections == NULL)
3153                         collate->sections = &collate->error_section;
3154                       else
3155                         {
3156                           sp = collate->sections;
3157                           while (sp->next != NULL)
3158                             sp = sp->next;
3159
3160                           sp->next = &collate->error_section;
3161                         }
3162                       collate->error_section.next = NULL;
3163                     }
3164                 }
3165               else
3166                 {
3167                   /* One should not be allowed to open the same
3168                      section twice.  */
3169                   if (sp->first != NULL)
3170                     lr_error (ldfile, _("\
3171 %s: multiple order definitions for section `%s'"),
3172                               "LC_COLLATE", sp->name);
3173                   else
3174                     {
3175                       /* Insert sp in the collate->sections list,
3176                          right after collate->current_section.  */
3177                       if (collate->current_section != NULL)
3178                         {
3179                           sp->next = collate->current_section->next;
3180                           collate->current_section->next = sp;
3181                         }
3182                       else if (collate->sections == NULL)
3183                         /* This is the first section to be defined.  */
3184                         collate->sections = sp;
3185
3186                       collate->current_section = sp;
3187                     }
3188
3189                   /* Next should come the end of the line or a semicolon.  */
3190                   arg = lr_token (ldfile, charmap, result, repertoire,
3191                                   verbose);
3192                   if (arg->tok == tok_eol)
3193                     {
3194                       uint32_t cnt;
3195
3196                       /* This means we have exactly one rule: `forward'.  */
3197                       if (nrules > 1)
3198                         lr_error (ldfile, _("\
3199 %s: invalid number of sorting rules"),
3200                                   "LC_COLLATE");
3201                       else
3202                         nrules = 1;
3203                       sp->rules = obstack_alloc (&collate->mempool,
3204                                                  (sizeof (enum coll_sort_rule)
3205                                                   * nrules));
3206                       for (cnt = 0; cnt < nrules; ++cnt)
3207                         sp->rules[cnt] = sort_forward;
3208
3209                       /* Next line.  */
3210                       break;
3211                     }
3212
3213                   /* Get the next token.  */
3214                   arg = lr_token (ldfile, charmap, result, repertoire,
3215                                   verbose);
3216                 }
3217             }
3218           else
3219             {
3220               /* There is no section symbol.  Therefore we use the unnamed
3221                  section.  */
3222               collate->current_section = &collate->unnamed_section;
3223
3224               if (collate->unnamed_section_defined)
3225                 lr_error (ldfile, _("\
3226 %s: multiple order definitions for unnamed section"),
3227                           "LC_COLLATE");
3228               else
3229                 {
3230                   /* Insert &collate->unnamed_section at the beginning of
3231                      the collate->sections list.  */
3232                   collate->unnamed_section.next = collate->sections;
3233                   collate->sections = &collate->unnamed_section;
3234                   collate->unnamed_section_defined = true;
3235                 }
3236             }
3237
3238           /* Now read the direction names.  */
3239           read_directions (ldfile, arg, charmap, repertoire, result);
3240
3241           /* From now we need the strings untranslated.  */
3242           ldfile->translate_strings = 0;
3243           break;
3244
3245         case tok_order_end:
3246           /* Ignore the rest of the line if we don't need the input of
3247              this line.  */
3248           if (ignore_content)
3249             {
3250               lr_ignore_rest (ldfile, 0);
3251               break;
3252             }
3253
3254           if (state != 1)
3255             goto err_label;
3256
3257           /* Handle ellipsis at end of list.  */
3258           if (was_ellipsis != tok_none)
3259             {
3260               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3261                                repertoire, result);
3262               was_ellipsis = tok_none;
3263             }
3264
3265           state = 2;
3266           lr_ignore_rest (ldfile, 1);
3267           break;
3268
3269         case tok_reorder_after:
3270           /* Ignore the rest of the line if we don't need the input of
3271              this line.  */
3272           if (ignore_content)
3273             {
3274               lr_ignore_rest (ldfile, 0);
3275               break;
3276             }
3277
3278           if (state == 1)
3279             {
3280               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3281                         "LC_COLLATE");
3282               state = 2;
3283
3284               /* Handle ellipsis at end of list.  */
3285               if (was_ellipsis != tok_none)
3286                 {
3287                   handle_ellipsis (ldfile, arg->val.str.startmb,
3288                                    arg->val.str.lenmb, was_ellipsis, charmap,
3289                                    repertoire, result);
3290                   was_ellipsis = tok_none;
3291                 }
3292             }
3293           else if (state == 0 && copy_locale == NULL)
3294             goto err_label;
3295           else if (state != 0 && state != 2 && state != 3)
3296             goto err_label;
3297           state = 3;
3298
3299           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3300           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3301             {
3302               /* Find this symbol in the sequence table.  */
3303               char ucsbuf[10];
3304               char *startmb;
3305               size_t lenmb;
3306               struct element_t *insp;
3307               int no_error = 1;
3308               void *ptr;
3309
3310               if (arg->tok == tok_bsymbol)
3311                 {
3312                   startmb = arg->val.str.startmb;
3313                   lenmb = arg->val.str.lenmb;
3314                 }
3315               else
3316                 {
3317                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3318                   startmb = ucsbuf;
3319                   lenmb = 9;
3320                 }
3321
3322               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3323                 /* Yes, the symbol exists.  Simply point the cursor
3324                    to it.  */
3325                 collate->cursor = (struct element_t *) ptr;
3326               else
3327                 {
3328                   struct symbol_t *symbp;
3329                   void *ptr;
3330
3331                   if (find_entry (&collate->sym_table, startmb, lenmb,
3332                                   &ptr) == 0)
3333                     {
3334                       symbp = ptr;
3335
3336                       if (symbp->order->last != NULL
3337                           || symbp->order->next != NULL)
3338                         collate->cursor = symbp->order;
3339                       else
3340                         {
3341                           /* This is a collating symbol but its position
3342                              is not yet defined.  */
3343                           lr_error (ldfile, _("\
3344 %s: order for collating symbol %.*s not yet defined"),
3345                                     "LC_COLLATE", (int) lenmb, startmb);
3346                           collate->cursor = NULL;
3347                           no_error = 0;
3348                         }
3349                     }
3350                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3351                                        &ptr) == 0)
3352                     {
3353                       insp = (struct element_t *) ptr;
3354
3355                       if (insp->last != NULL || insp->next != NULL)
3356                         collate->cursor = insp;
3357                       else
3358                         {
3359                           /* This is a collating element but its position
3360                              is not yet defined.  */
3361                           lr_error (ldfile, _("\
3362 %s: order for collating element %.*s not yet defined"),
3363                                     "LC_COLLATE", (int) lenmb, startmb);
3364                           collate->cursor = NULL;
3365                           no_error = 0;
3366                         }
3367                     }
3368                   else
3369                     {
3370                       /* This is bad.  The symbol after which we have to
3371                          insert does not exist.  */
3372                       lr_error (ldfile, _("\
3373 %s: cannot reorder after %.*s: symbol not known"),
3374                                 "LC_COLLATE", (int) lenmb, startmb);
3375                       collate->cursor = NULL;
3376                       no_error = 0;
3377                     }
3378                 }
3379
3380               lr_ignore_rest (ldfile, no_error);
3381             }
3382           else
3383             /* This must not happen.  */
3384             goto err_label;
3385           break;
3386
3387         case tok_reorder_end:
3388           /* Ignore the rest of the line if we don't need the input of
3389              this line.  */
3390           if (ignore_content)
3391             break;
3392
3393           if (state != 3)
3394             goto err_label;
3395           state = 4;
3396           lr_ignore_rest (ldfile, 1);
3397           break;
3398
3399         case tok_reorder_sections_after:
3400           /* Ignore the rest of the line if we don't need the input of
3401              this line.  */
3402           if (ignore_content)
3403             {
3404               lr_ignore_rest (ldfile, 0);
3405               break;
3406             }
3407
3408           if (state == 1)
3409             {
3410               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3411                         "LC_COLLATE");
3412               state = 2;
3413
3414               /* Handle ellipsis at end of list.  */
3415               if (was_ellipsis != tok_none)
3416                 {
3417                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3418                                    repertoire, result);
3419                   was_ellipsis = tok_none;
3420                 }
3421             }
3422           else if (state == 3)
3423             {
3424               record_error (0, 0, _("\
3425 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3426               state = 4;
3427             }
3428           else if (state != 2 && state != 4)
3429             goto err_label;
3430           state = 5;
3431
3432           /* Get the name of the sections we are adding after.  */
3433           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3434           if (arg->tok == tok_bsymbol)
3435             {
3436               /* Now find a section with this name.  */
3437               struct section_list *runp = collate->sections;
3438
3439               while (runp != NULL)
3440                 {
3441                   if (runp->name != NULL
3442                       && strlen (runp->name) == arg->val.str.lenmb
3443                       && memcmp (runp->name, arg->val.str.startmb,
3444                                  arg->val.str.lenmb) == 0)
3445                     break;
3446
3447                   runp = runp->next;
3448                 }
3449
3450               if (runp != NULL)
3451                 collate->current_section = runp;
3452               else
3453                 {
3454                   /* This is bad.  The section after which we have to
3455                      reorder does not exist.  Therefore we cannot
3456                      process the whole rest of this reorder
3457                      specification.  */
3458                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3459                             "LC_COLLATE", (int) arg->val.str.lenmb,
3460                             arg->val.str.startmb);
3461
3462                   do
3463                     {
3464                       lr_ignore_rest (ldfile, 0);
3465
3466                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3467                     }
3468                   while (now->tok == tok_reorder_sections_after
3469                          || now->tok == tok_reorder_sections_end
3470                          || now->tok == tok_end);
3471
3472                   /* Process the token we just saw.  */
3473                   nowtok = now->tok;
3474                   continue;
3475                 }
3476             }
3477           else
3478             /* This must not happen.  */
3479             goto err_label;
3480           break;
3481
3482         case tok_reorder_sections_end:
3483           /* Ignore the rest of the line if we don't need the input of
3484              this line.  */
3485           if (ignore_content)
3486             break;
3487
3488           if (state != 5)
3489             goto err_label;
3490           state = 6;
3491           lr_ignore_rest (ldfile, 1);
3492           break;
3493
3494         case tok_bsymbol:
3495         case tok_ucs4:
3496           /* Ignore the rest of the line if we don't need the input of
3497              this line.  */
3498           if (ignore_content)
3499             {
3500               lr_ignore_rest (ldfile, 0);
3501               break;
3502             }
3503
3504           if (state != 0 && state != 1 && state != 3 && state != 5)
3505             goto err_label;
3506
3507           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3508             goto err_label;
3509
3510           if (nowtok == tok_ucs4)
3511             {
3512               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3513               symstr = ucs4buf;
3514               symlen = 9;
3515             }
3516           else if (arg != NULL)
3517             {
3518               symstr = arg->val.str.startmb;
3519               symlen = arg->val.str.lenmb;
3520             }
3521           else
3522             {
3523               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3524                         (int) ldfile->token.val.str.lenmb,
3525                         ldfile->token.val.str.startmb);
3526               break;
3527             }
3528
3529           struct element_t *seqp;
3530           if (state == 0)
3531             {
3532               /* We are outside an `order_start' region.  This means
3533                  we must only accept definitions of values for
3534                  collation symbols since these are purely abstract
3535                  values and don't need directions associated.  */
3536               void *ptr;
3537
3538               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3539                 {
3540                   seqp = ptr;
3541
3542                   /* It's already defined.  First check whether this
3543                      is really a collating symbol.  */
3544                   if (seqp->is_character)
3545                     goto err_label;
3546
3547                   goto move_entry;
3548                 }
3549               else
3550                 {
3551                   void *result;
3552
3553                   if (find_entry (&collate->sym_table, symstr, symlen,
3554                                   &result) != 0)
3555                     /* No collating symbol, it's an error.  */
3556                     goto err_label;
3557
3558                   /* Maybe this is the first time we define a symbol
3559                      value and it is before the first actual section.  */
3560                   if (collate->sections == NULL)
3561                     collate->sections = collate->current_section =
3562                       &collate->symbol_section;
3563                 }
3564
3565               if (was_ellipsis != tok_none)
3566                 {
3567                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3568                                    charmap, repertoire, result);
3569
3570                   /* Remember that we processed the ellipsis.  */
3571                   was_ellipsis = tok_none;
3572
3573                   /* And don't add the value a second time.  */
3574                   break;
3575                 }
3576             }
3577           else if (state == 3)
3578             {
3579               /* It is possible that we already have this collation sequence.
3580                  In this case we move the entry.  */
3581               void *sym;
3582               void *ptr;
3583
3584               /* If the symbol after which we have to insert was not found
3585                  ignore all entries.  */
3586               if (collate->cursor == NULL)
3587                 {
3588                   lr_ignore_rest (ldfile, 0);
3589                   break;
3590                 }
3591
3592               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3593                 {
3594                   seqp = (struct element_t *) ptr;
3595                   goto move_entry;
3596                 }
3597
3598               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3599                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3600                 goto move_entry;
3601
3602               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3603                   && (seqp = (struct element_t *) ptr,
3604                       seqp->last != NULL || seqp->next != NULL
3605                       || (collate->start != NULL && seqp == collate->start)))
3606                 {
3607                 move_entry:
3608                   /* Remove the entry from the old position.  */
3609                   if (seqp->last == NULL)
3610                     collate->start = seqp->next;
3611                   else
3612                     seqp->last->next = seqp->next;
3613                   if (seqp->next != NULL)
3614                     seqp->next->last = seqp->last;
3615
3616                   /* We also have to check whether this entry is the
3617                      first or last of a section.  */
3618                   if (seqp->section->first == seqp)
3619                     {
3620                       if (seqp->section->first == seqp->section->last)
3621                         /* This section has no content anymore.  */
3622                         seqp->section->first = seqp->section->last = NULL;
3623                       else
3624                         seqp->section->first = seqp->next;
3625                     }
3626                   else if (seqp->section->last == seqp)
3627                     seqp->section->last = seqp->last;
3628
3629                   /* Now insert it in the new place.  */
3630                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3631                                   tok_none);
3632                   break;
3633                 }
3634
3635               /* Otherwise we just add a new entry.  */
3636             }
3637           else if (state == 5)
3638             {
3639               /* We are reordering sections.  Find the named section.  */
3640               struct section_list *runp = collate->sections;
3641               struct section_list *prevp = NULL;
3642
3643               while (runp != NULL)
3644                 {
3645                   if (runp->name != NULL
3646                       && strlen (runp->name) == symlen
3647                       && memcmp (runp->name, symstr, symlen) == 0)
3648                     break;
3649
3650                   prevp = runp;
3651                   runp = runp->next;
3652                 }
3653
3654               if (runp == NULL)
3655                 {
3656                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3657                             "LC_COLLATE", (int) symlen, symstr);
3658                   lr_ignore_rest (ldfile, 0);
3659                 }
3660               else
3661                 {
3662                   if (runp != collate->current_section)
3663                     {
3664                       /* Remove the named section from the old place and
3665                          insert it in the new one.  */
3666                       prevp->next = runp->next;
3667
3668                       runp->next = collate->current_section->next;
3669                       collate->current_section->next = runp;
3670                       collate->current_section = runp;
3671                     }
3672
3673                   /* Process the rest of the line which might change
3674                      the collation rules.  */
3675                   arg = lr_token (ldfile, charmap, result, repertoire,
3676                                   verbose);
3677                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3678                     read_directions (ldfile, arg, charmap, repertoire,
3679                                      result);
3680                 }
3681               break;
3682             }
3683           else if (was_ellipsis != tok_none)
3684             {
3685               /* Using the information in the `ellipsis_weight'
3686                  element and this and the last value we have to handle
3687                  the ellipsis now.  */
3688               assert (state == 1);
3689
3690               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3691                                repertoire, result);
3692
3693               /* Remember that we processed the ellipsis.  */
3694               was_ellipsis = tok_none;
3695
3696               /* And don't add the value a second time.  */
3697               break;
3698             }
3699
3700           /* Now insert in the new place.  */
3701           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3702           break;
3703
3704         case tok_undefined:
3705           /* Ignore the rest of the line if we don't need the input of
3706              this line.  */
3707           if (ignore_content)
3708             {
3709               lr_ignore_rest (ldfile, 0);
3710               break;
3711             }
3712
3713           if (state != 1)
3714             goto err_label;
3715
3716           if (was_ellipsis != tok_none)
3717             {
3718               lr_error (ldfile,
3719                         _("%s: cannot have `%s' as end of ellipsis range"),
3720                         "LC_COLLATE", "UNDEFINED");
3721
3722               unlink_element (collate);
3723               was_ellipsis = tok_none;
3724             }
3725
3726           /* See whether UNDEFINED already appeared somewhere.  */
3727           if (collate->undefined.next != NULL
3728               || &collate->undefined == collate->cursor)
3729             {
3730               lr_error (ldfile,
3731                         _("%s: order for `%.*s' already defined at %s:%zu"),
3732                         "LC_COLLATE", 9, "UNDEFINED",
3733                         collate->undefined.file,
3734                         collate->undefined.line);
3735               lr_ignore_rest (ldfile, 0);
3736             }
3737           else
3738             /* Parse the weights.  */
3739              insert_weights (ldfile, &collate->undefined, charmap,
3740                              repertoire, result, tok_none);
3741           break;
3742
3743         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3744         case tok_ellipsis3: /* absolute ellipsis */
3745         case tok_ellipsis4: /* symbolic decimal ellipsis */
3746           /* This is the symbolic (decimal or hexadecimal) or absolute
3747              ellipsis.  */
3748           if (was_ellipsis != tok_none)
3749             goto err_label;
3750
3751           if (state != 0 && state != 1 && state != 3)
3752             goto err_label;
3753
3754           was_ellipsis = nowtok;
3755
3756           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3757                           repertoire, result, nowtok);
3758           break;
3759
3760         case tok_end:
3761         seen_end:
3762           /* Next we assume `LC_COLLATE'.  */
3763           if (!ignore_content)
3764             {
3765               if (state == 0
3766                   && copy_locale == NULL
3767                   && !collate->codepoint_collation)
3768                 /* We must either see a copy statement or have
3769                    ordering values, or codepoint_collation.  */
3770                 lr_error (ldfile,
3771                           _("%s: empty category description not allowed"),
3772                           "LC_COLLATE");
3773               else if (state == 1)
3774                 {
3775                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3776                             "LC_COLLATE");
3777
3778                   /* Handle ellipsis at end of list.  */
3779                   if (was_ellipsis != tok_none)
3780                     {
3781                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3782                                        repertoire, result);
3783                       was_ellipsis = tok_none;
3784                     }
3785                 }
3786               else if (state == 3)
3787                 record_error (0, 0, _("\
3788 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3789               else if (state == 5)
3790                 record_error (0, 0, _("\
3791 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3792             }
3793           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3794           if (arg->tok == tok_eof)
3795             break;
3796           if (arg->tok == tok_eol)
3797             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3798           else if (arg->tok != tok_lc_collate)
3799             lr_error (ldfile, _("\
3800 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3801           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3802           return;
3803
3804         case tok_define:
3805           if (ignore_content)
3806             {
3807               lr_ignore_rest (ldfile, 0);
3808               break;
3809             }
3810
3811           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3812           if (arg->tok != tok_ident)
3813             goto err_label;
3814
3815           /* Simply add the new symbol.  */
3816           struct name_list *newsym = xmalloc (sizeof (*newsym)
3817                                               + arg->val.str.lenmb + 1);
3818           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3819           newsym->str[arg->val.str.lenmb] = '\0';
3820           newsym->next = defined;
3821           defined = newsym;
3822
3823           lr_ignore_rest (ldfile, 1);
3824           break;
3825
3826         case tok_undef:
3827           if (ignore_content)
3828             {
3829               lr_ignore_rest (ldfile, 0);
3830               break;
3831             }
3832
3833           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3834           if (arg->tok != tok_ident)
3835             goto err_label;
3836
3837           /* Remove _all_ occurrences of the symbol from the list.  */
3838           struct name_list *prevdef = NULL;
3839           struct name_list *curdef = defined;
3840           while (curdef != NULL)
3841             if (strncmp (arg->val.str.startmb, curdef->str,
3842                          arg->val.str.lenmb) == 0
3843                 && curdef->str[arg->val.str.lenmb] == '\0')
3844               {
3845                 if (prevdef == NULL)
3846                   defined = curdef->next;
3847                 else
3848                   prevdef->next = curdef->next;
3849
3850                 struct name_list *olddef = curdef;
3851                 curdef = curdef->next;
3852
3853                 free (olddef);
3854               }
3855             else
3856               {
3857                 prevdef = curdef;
3858                 curdef = curdef->next;
3859               }
3860
3861           lr_ignore_rest (ldfile, 1);
3862           break;
3863
3864         case tok_ifdef:
3865         case tok_ifndef:
3866           if (ignore_content)
3867             {
3868               lr_ignore_rest (ldfile, 0);
3869               break;
3870             }
3871
3872         found_ifdef:
3873           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3874           if (arg->tok != tok_ident)
3875             goto err_label;
3876           lr_ignore_rest (ldfile, 1);
3877
3878           if (collate->else_action == else_none)
3879             {
3880               curdef = defined;
3881               while (curdef != NULL)
3882                 if (strncmp (arg->val.str.startmb, curdef->str,
3883                              arg->val.str.lenmb) == 0
3884                     && curdef->str[arg->val.str.lenmb] == '\0')
3885                   break;
3886                 else
3887                   curdef = curdef->next;
3888
3889               if ((nowtok == tok_ifdef && curdef != NULL)
3890                   || (nowtok == tok_ifndef && curdef == NULL))
3891                 {
3892                   /* We have to use the if-branch.  */
3893                   collate->else_action = else_ignore;
3894                 }
3895               else
3896                 {
3897                   /* We have to use the else-branch, if there is one.  */
3898                   nowtok = skip_to (ldfile, collate, charmap, 0);
3899                   if (nowtok == tok_else)
3900                     collate->else_action = else_seen;
3901                   else if (nowtok == tok_elifdef)
3902                     {
3903                       nowtok = tok_ifdef;
3904                       goto found_ifdef;
3905                     }
3906                   else if (nowtok == tok_elifndef)
3907                     {
3908                       nowtok = tok_ifndef;
3909                       goto found_ifdef;
3910                     }
3911                   else if (nowtok == tok_eof)
3912                     goto seen_eof;
3913                   else if (nowtok == tok_end)
3914                     goto seen_end;
3915                 }
3916             }
3917           else
3918             {
3919               /* XXX Should it really become necessary to support nested
3920                  preprocessor handling we will push the state here.  */
3921               lr_error (ldfile, _("%s: nested conditionals not supported"),
3922                         "LC_COLLATE");
3923               nowtok = skip_to (ldfile, collate, charmap, 1);
3924               if (nowtok == tok_eof)
3925                 goto seen_eof;
3926               else if (nowtok == tok_end)
3927                 goto seen_end;
3928             }
3929           break;
3930
3931         case tok_elifdef:
3932         case tok_elifndef:
3933         case tok_else:
3934           if (ignore_content)
3935             {
3936               lr_ignore_rest (ldfile, 0);
3937               break;
3938             }
3939
3940           lr_ignore_rest (ldfile, 1);
3941
3942           if (collate->else_action == else_ignore)
3943             {
3944               /* Ignore everything until the endif.  */
3945               nowtok = skip_to (ldfile, collate, charmap, 1);
3946               if (nowtok == tok_eof)
3947                 goto seen_eof;
3948               else if (nowtok == tok_end)
3949                 goto seen_end;
3950             }
3951           else
3952             {
3953               assert (collate->else_action == else_none);
3954               lr_error (ldfile, _("\
3955 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3956                         nowtok == tok_else ? "else"
3957                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3958             }
3959           break;
3960
3961         case tok_endif:
3962           if (ignore_content)
3963             {
3964               lr_ignore_rest (ldfile, 0);
3965               break;
3966             }
3967
3968           lr_ignore_rest (ldfile, 1);
3969
3970           if (collate->else_action != else_ignore
3971               && collate->else_action != else_seen)
3972             lr_error (ldfile, _("\
3973 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3974
3975           /* XXX If we support nested preprocessor directives we pop
3976              the state here.  */
3977           collate->else_action = else_none;
3978           break;
3979
3980         default:
3981         err_label:
3982           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3983         }
3984
3985       /* Prepare for the next round.  */
3986       now = lr_token (ldfile, charmap, result, NULL, verbose);
3987       nowtok = now->tok;
3988     }
3989
3990  seen_eof:
3991   /* When we come here we reached the end of the file.  */
3992   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3993 }