locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2019 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <errno.h>
  23 #include <stdlib.h>
  24 #include <wchar.h>
  25 #include <stdint.h>
  26 #include <sys/param.h>
  27
  28 #include "localedef.h"
  29 #include "charmap.h"
  30 #include "localeinfo.h"
  31 #include "linereader.h"
  32 #include "locfile.h"
  33 #include "elem-hash.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* #define NDEBUG 1 */
  37 #include <assert.h>
  38
  39 #define obstack_chunk_alloc malloc
  40 #define obstack_chunk_free free
  41
  42 static inline void
  43 __attribute ((always_inline))
  44 obstack_int32_grow (struct obstack *obstack, int32_t data)
  45 {
  46   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  47   data = maybe_swap_uint32 (data);
  48   if (sizeof (int32_t) == sizeof (int))
  49     obstack_int_grow (obstack, data);
  50   else
  51     obstack_grow (obstack, &data, sizeof (int32_t));
  52 }
  53
  54 static inline void
  55 __attribute ((always_inline))
  56 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  57 {
  58   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  59   data = maybe_swap_uint32 (data);
  60   if (sizeof (int32_t) == sizeof (int))
  61     obstack_int_grow_fast (obstack, data);
  62   else
  63     obstack_grow (obstack, &data, sizeof (int32_t));
  64 }
  65
  66 /* Forward declaration.  */
  67 struct element_t;
  68
  69 /* Data type for list of strings.  */
  70 struct section_list
  71 {
  72   /* Successor in the known_sections list.  */
  73   struct section_list *def_next;
  74   /* Successor in the sections list.  */
  75   struct section_list *next;
  76   /* Name of the section.  */
  77   const char *name;
  78   /* First element of this section.  */
  79   struct element_t *first;
  80   /* Last element of this section.  */
  81   struct element_t *last;
  82   /* These are the rules for this section.  */
  83   enum coll_sort_rule *rules;
  84   /* Index of the rule set in the appropriate section of the output file.  */
  85   int ruleidx;
  86 };
  87
  88 struct element_t;
  89
  90 struct element_list_t
  91 {
  92   /* Number of elements.  */
  93   int cnt;
  94
  95   struct element_t **w;
  96 };
  97
  98 /* Data type for collating element.  */
  99 struct element_t
 100 {
 101   const char *name;
 102
 103   const char *mbs;
 104   size_t nmbs;
 105   const uint32_t *wcs;
 106   size_t nwcs;
 107   int *mborder;
 108   int wcorder;
 109
 110   /* The following is a bit mask which bits are set if this element is
 111      used in the appropriate level.  Interesting for the singlebyte
 112      weight computation.
 113
 114      XXX The type here restricts the number of levels to 32.  It could
 115      be changed if necessary but I doubt this is necessary.  */
 116   unsigned int used_in_level;
 117
 118   struct element_list_t *weights;
 119
 120   /* Nonzero if this is a real character definition.  */
 121   int is_character;
 122
 123   /* Order of the character in the sequence.  This information will
 124      be used in range expressions.  */
 125   int mbseqorder;
 126   int wcseqorder;
 127
 128   /* Where does the definition come from.  */
 129   const char *file;
 130   size_t line;
 131
 132   /* Which section does this belong to.  */
 133   struct section_list *section;
 134
 135   /* Predecessor and successor in the order list.  */
 136   struct element_t *last;
 137   struct element_t *next;
 138
 139   /* Next element in multibyte output list.  */
 140   struct element_t *mbnext;
 141   struct element_t *mblast;
 142
 143   /* Next element in wide character output list.  */
 144   struct element_t *wcnext;
 145   struct element_t *wclast;
 146 };
 147
 148 /* Special element value.  */
 149 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 150 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 151 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 152
 153 /* Data type for collating symbol.  */
 154 struct symbol_t
 155 {
 156   const char *name;
 157
 158   /* Point to place in the order list.  */
 159   struct element_t *order;
 160
 161   /* Where does the definition come from.  */
 162   const char *file;
 163   size_t line;
 164 };
 165
 166 /* Sparse table of struct element_t *.  */
 167 #define TABLE wchead_table
 168 #define ELEMENT struct element_t *
 169 #define DEFAULT NULL
 170 #define ITERATE
 171 #define NO_ADD_LOCALE
 172 #include "3level.h"
 173
 174 /* Sparse table of int32_t.  */
 175 #define TABLE collidx_table
 176 #define ELEMENT int32_t
 177 #define DEFAULT 0
 178 #include "3level.h"
 179
 180 /* Sparse table of uint32_t.  */
 181 #define TABLE collseq_table
 182 #define ELEMENT uint32_t
 183 #define DEFAULT ~((uint32_t) 0)
 184 #include "3level.h"
 185
 186
 187 /* Simple name list for the preprocessor.  */
 188 struct name_list
 189 {
 190   struct name_list *next;
 191   char str[0];
 192 };
 193
 194
 195 /* The real definition of the struct for the LC_COLLATE locale.  */
 196 struct locale_collate_t
 197 {
 198   int col_weight_max;
 199   int cur_weight_max;
 200
 201   /* List of known scripts.  */
 202   struct section_list *known_sections;
 203   /* List of used sections.  */
 204   struct section_list *sections;
 205   /* Current section using definition.  */
 206   struct section_list *current_section;
 207   /* There always can be an unnamed section.  */
 208   struct section_list unnamed_section;
 209   /* Flag whether the unnamed section has been defined.  */
 210   bool unnamed_section_defined;
 211   /* To make handling of errors easier we have another section.  */
 212   struct section_list error_section;
 213   /* Sometimes we are defining the values for collating symbols before
 214      the first actual section.  */
 215   struct section_list symbol_section;
 216
 217   /* Start of the order list.  */
 218   struct element_t *start;
 219
 220   /* The undefined element.  */
 221   struct element_t undefined;
 222
 223   /* This is the cursor for `reorder_after' insertions.  */
 224   struct element_t *cursor;
 225
 226   /* This value is used when handling ellipsis.  */
 227   struct element_t ellipsis_weight;
 228
 229   /* Known collating elements.  */
 230   hash_table elem_table;
 231
 232   /* Known collating symbols.  */
 233   hash_table sym_table;
 234
 235   /* Known collation sequences.  */
 236   hash_table seq_table;
 237
 238   struct obstack mempool;
 239
 240   /* The LC_COLLATE category is a bit special as it is sometimes possible
 241      that the definitions from more than one input file contains information.
 242      Therefore we keep all relevant input in a list.  */
 243   struct locale_collate_t *next;
 244
 245   /* Arrays with heads of the list for each of the leading bytes in
 246      the multibyte sequences.  */
 247   struct element_t *mbheads[256];
 248
 249   /* Arrays with heads of the list for each of the leading bytes in
 250      the multibyte sequences.  */
 251   struct wchead_table wcheads;
 252
 253   /* The arrays with the collation sequence order.  */
 254   unsigned char mbseqorder[256];
 255   struct collseq_table wcseqorder;
 256
 257   /* State of the preprocessor.  */
 258   enum
 259     {
 260       else_none = 0,
 261       else_ignore,
 262       else_seen
 263     }
 264     else_action;
 265 };
 266
 267
 268 /* We have a few global variables which are used for reading all
 269    LC_COLLATE category descriptions in all files.  */
 270 static uint32_t nrules;
 271
 272 /* List of defined preprocessor symbols.  */
 273 static struct name_list *defined;
 274
 275
 276 /* We need UTF-8 encoding of numbers.  */
 277 static inline int
 278 __attribute ((always_inline))
 279 utf8_encode (char *buf, int val)
 280 {
 281   int retval;
 282
 283   if (val < 0x80)
 284     {
 285       *buf++ = (char) val;
 286       retval = 1;
 287     }
 288   else
 289     {
 290       int step;
 291
 292       for (step = 2; step < 6; ++step)
 293         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 294           break;
 295       retval = step;
 296
 297       *buf = (unsigned char) (~0xff >> step);
 298       --step;
 299       do
 300         {
 301           buf[step] = 0x80 | (val & 0x3f);
 302           val >>= 6;
 303         }
 304       while (--step > 0);
 305       *buf |= val;
 306     }
 307
 308   return retval;
 309 }
 310
 311
 312 static struct section_list *
 313 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 314                    struct section_list *next)
 315 {
 316   struct section_list *newp;
 317
 318   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 319                                                 sizeof (*newp));
 320   newp->next = next;
 321   newp->name = string;
 322   newp->first = NULL;
 323   newp->last = NULL;
 324
 325   return newp;
 326 }
 327
 328
 329 static struct element_t *
 330 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 331              const uint32_t *wcs, const char *name, size_t namelen,
 332              int is_character)
 333 {
 334   struct element_t *newp;
 335
 336   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 337                                              sizeof (*newp));
 338   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 339                                                     name, namelen);
 340   if (mbs != NULL)
 341     {
 342       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 343       newp->nmbs = mbslen;
 344     }
 345   else
 346     {
 347       newp->mbs = NULL;
 348       newp->nmbs = 0;
 349     }
 350   if (wcs != NULL)
 351     {
 352       size_t nwcs = wcslen ((wchar_t *) wcs);
 353       uint32_t zero = 0;
 354       /* Handle <U0000> as a single character.  */
 355       if (nwcs == 0)
 356         nwcs = 1;
 357       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 358       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 359       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 360       newp->nwcs = nwcs;
 361     }
 362   else
 363     {
 364       newp->wcs = NULL;
 365       newp->nwcs = 0;
 366     }
 367   newp->mborder = NULL;
 368   newp->wcorder = 0;
 369   newp->used_in_level = 0;
 370   newp->is_character = is_character;
 371
 372   /* Will be assigned later.  XXX  */
 373   newp->mbseqorder = 0;
 374   newp->wcseqorder = 0;
 375
 376   /* Will be allocated later.  */
 377   newp->weights = NULL;
 378
 379   newp->file = NULL;
 380   newp->line = 0;
 381
 382   newp->section = collate->current_section;
 383
 384   newp->last = NULL;
 385   newp->next = NULL;
 386
 387   newp->mbnext = NULL;
 388   newp->mblast = NULL;
 389
 390   newp->wcnext = NULL;
 391   newp->wclast = NULL;
 392
 393   return newp;
 394 }
 395
 396
 397 static struct symbol_t *
 398 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 399 {
 400   struct symbol_t *newp;
 401
 402   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 403
 404   newp->name = obstack_copy0 (&collate->mempool, name, len);
 405   newp->order = NULL;
 406
 407   newp->file = NULL;
 408   newp->line = 0;
 409
 410   return newp;
 411 }
 412
 413
 414 /* Test whether this name is already defined somewhere.  */
 415 static int
 416 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 417                  const struct charmap_t *charmap,
 418                  struct repertoire_t *repertoire, const char *symbol,
 419                  size_t symbol_len)
 420 {
 421   void *ignore = NULL;
 422
 423   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 424     {
 425       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 426                 (int) symbol_len, symbol);
 427       return 1;
 428     }
 429
 430   if (repertoire != NULL
 431       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 432           == 0))
 433     {
 434       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 435                 (int) symbol_len, symbol);
 436       return 1;
 437     }
 438
 439   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 440     {
 441       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 442                 (int) symbol_len, symbol);
 443       return 1;
 444     }
 445
 446   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 447     {
 448       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 449                 (int) symbol_len, symbol);
 450       return 1;
 451     }
 452
 453   return 0;
 454 }
 455
 456
 457 /* Read the direction specification.  */
 458 static void
 459 read_directions (struct linereader *ldfile, struct token *arg,
 460                  const struct charmap_t *charmap,
 461                  struct repertoire_t *repertoire, struct localedef_t *result)
 462 {
 463   int cnt = 0;
 464   int max = nrules ?: 10;
 465   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 466   int warned = 0;
 467   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 468
 469   while (1)
 470     {
 471       int valid = 0;
 472
 473       if (arg->tok == tok_forward)
 474         {
 475           if (rules[cnt] & sort_backward)
 476             {
 477               if (! warned)
 478                 {
 479                   lr_error (ldfile, _("\
 480 %s: `forward' and `backward' are mutually excluding each other"),
 481                             "LC_COLLATE");
 482                   warned = 1;
 483                 }
 484             }
 485           else if (rules[cnt] & sort_forward)
 486             {
 487               if (! warned)
 488                 {
 489                   lr_error (ldfile, _("\
 490 %s: `%s' mentioned more than once in definition of weight %d"),
 491                             "LC_COLLATE", "forward", cnt + 1);
 492                 }
 493             }
 494           else
 495             rules[cnt] |= sort_forward;
 496
 497           valid = 1;
 498         }
 499       else if (arg->tok == tok_backward)
 500         {
 501           if (rules[cnt] & sort_forward)
 502             {
 503               if (! warned)
 504                 {
 505                   lr_error (ldfile, _("\
 506 %s: `forward' and `backward' are mutually excluding each other"),
 507                             "LC_COLLATE");
 508                   warned = 1;
 509                 }
 510             }
 511           else if (rules[cnt] & sort_backward)
 512             {
 513               if (! warned)
 514                 {
 515                   lr_error (ldfile, _("\
 516 %s: `%s' mentioned more than once in definition of weight %d"),
 517                             "LC_COLLATE", "backward", cnt + 1);
 518                 }
 519             }
 520           else
 521             rules[cnt] |= sort_backward;
 522
 523           valid = 1;
 524         }
 525       else if (arg->tok == tok_position)
 526         {
 527           if (rules[cnt] & sort_position)
 528             {
 529               if (! warned)
 530                 {
 531                   lr_error (ldfile, _("\
 532 %s: `%s' mentioned more than once in definition of weight %d"),
 533                             "LC_COLLATE", "position", cnt + 1);
 534                 }
 535             }
 536           else
 537             rules[cnt] |= sort_position;
 538
 539           valid = 1;
 540         }
 541
 542       if (valid)
 543         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 544
 545       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 546           || arg->tok == tok_semicolon)
 547         {
 548           if (! valid && ! warned)
 549             {
 550               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 551               warned = 1;
 552             }
 553
 554           /* See whether we have to increment the counter.  */
 555           if (arg->tok != tok_comma && rules[cnt] != 0)
 556             {
 557               /* Add the default `forward' if we have seen only `position'.  */
 558               if (rules[cnt] == sort_position)
 559                 rules[cnt] = sort_position | sort_forward;
 560
 561               ++cnt;
 562             }
 563
 564           if (arg->tok == tok_eof || arg->tok == tok_eol)
 565             /* End of line or file, so we exit the loop.  */
 566             break;
 567
 568           if (nrules == 0)
 569             {
 570               /* See whether we have enough room in the array.  */
 571               if (cnt == max)
 572                 {
 573                   max += 10;
 574                   rules = (enum coll_sort_rule *) xrealloc (rules,
 575                                                             max
 576                                                             * sizeof (*rules));
 577                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 578                 }
 579             }
 580           else
 581             {
 582               if (cnt == nrules)
 583                 {
 584                   /* There must not be any more rule.  */
 585                   if (! warned)
 586                     {
 587                       lr_error (ldfile, _("\
 588 %s: too many rules; first entry only had %d"),
 589                                 "LC_COLLATE", nrules);
 590                       warned = 1;
 591                     }
 592
 593                   lr_ignore_rest (ldfile, 0);
 594                   break;
 595                 }
 596             }
 597         }
 598       else
 599         {
 600           if (! warned)
 601             {
 602               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 603               warned = 1;
 604             }
 605         }
 606
 607       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 608     }
 609
 610   if (nrules == 0)
 611     {
 612       /* Now we know how many rules we have.  */
 613       nrules = cnt;
 614       rules = (enum coll_sort_rule *) xrealloc (rules,
 615                                                 nrules * sizeof (*rules));
 616     }
 617   else
 618     {
 619       if (cnt < nrules)
 620         {
 621           /* Not enough rules in this specification.  */
 622           if (! warned)
 623             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 624
 625           do
 626             rules[cnt] = sort_forward;
 627           while (++cnt < nrules);
 628         }
 629     }
 630
 631   collate->current_section->rules = rules;
 632 }
 633
 634
 635 static struct element_t *
 636 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 637               const char *str, size_t len)
 638 {
 639   void *result = NULL;
 640
 641   /* Search for the entries among the collation sequences already define.  */
 642   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 643     {
 644       /* Nope, not define yet.  So we see whether it is a
 645          collation symbol.  */
 646       void *ptr;
 647
 648       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 649         {
 650           /* It's a collation symbol.  */
 651           struct symbol_t *sym = (struct symbol_t *) ptr;
 652           result = sym->order;
 653
 654           if (result == NULL)
 655             result = sym->order = new_element (collate, NULL, 0, NULL,
 656                                                NULL, 0, 0);
 657         }
 658       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 659         {
 660           /* It's also no collation element.  So it is a character
 661              element defined later.  */
 662           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 663           /* Insert it into the sequence table.  */
 664           insert_entry (&collate->seq_table, str, len, result);
 665         }
 666     }
 667
 668   return (struct element_t *) result;
 669 }
 670
 671
 672 static void
 673 unlink_element (struct locale_collate_t *collate)
 674 {
 675   if (collate->cursor == collate->start)
 676     {
 677       assert (collate->cursor->next == NULL);
 678       assert (collate->cursor->last == NULL);
 679       collate->cursor = NULL;
 680     }
 681   else
 682     {
 683       if (collate->cursor->next != NULL)
 684         collate->cursor->next->last = collate->cursor->last;
 685       if (collate->cursor->last != NULL)
 686         collate->cursor->last->next = collate->cursor->next;
 687       collate->cursor = collate->cursor->last;
 688     }
 689 }
 690
 691
 692 static void
 693 insert_weights (struct linereader *ldfile, struct element_t *elem,
 694                 const struct charmap_t *charmap,
 695                 struct repertoire_t *repertoire, struct localedef_t *result,
 696                 enum token_t ellipsis)
 697 {
 698   int weight_cnt;
 699   struct token *arg;
 700   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 701
 702   /* Initialize all the fields.  */
 703   elem->file = ldfile->fname;
 704   elem->line = ldfile->lineno;
 705
 706   elem->last = collate->cursor;
 707   elem->next = collate->cursor ? collate->cursor->next : NULL;
 708   if (collate->cursor != NULL && collate->cursor->next != NULL)
 709     collate->cursor->next->last = elem;
 710   if (collate->cursor != NULL)
 711     collate->cursor->next = elem;
 712   if (collate->start == NULL)
 713     {
 714       assert (collate->cursor == NULL);
 715       collate->start = elem;
 716     }
 717
 718   elem->section = collate->current_section;
 719
 720   if (collate->current_section->first == NULL)
 721     collate->current_section->first = elem;
 722   if (collate->current_section->last == collate->cursor)
 723     collate->current_section->last = elem;
 724
 725   collate->cursor = elem;
 726
 727   elem->weights = (struct element_list_t *)
 728     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 729   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 730
 731   weight_cnt = 0;
 732
 733   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 734   do
 735     {
 736       if (arg->tok == tok_eof || arg->tok == tok_eol)
 737         break;
 738
 739       if (arg->tok == tok_ignore)
 740         {
 741           /* The weight for this level has to be ignored.  We use the
 742              null pointer to indicate this.  */
 743           elem->weights[weight_cnt].w = (struct element_t **)
 744             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 745           elem->weights[weight_cnt].w[0] = NULL;
 746           elem->weights[weight_cnt].cnt = 1;
 747         }
 748       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 749         {
 750           char ucs4str[10];
 751           struct element_t *val;
 752           char *symstr;
 753           size_t symlen;
 754
 755           if (arg->tok == tok_bsymbol)
 756             {
 757               symstr = arg->val.str.startmb;
 758               symlen = arg->val.str.lenmb;
 759             }
 760           else
 761             {
 762               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 763               symstr = ucs4str;
 764               symlen = 9;
 765             }
 766
 767           val = find_element (ldfile, collate, symstr, symlen);
 768           if (val == NULL)
 769             break;
 770
 771           elem->weights[weight_cnt].w = (struct element_t **)
 772             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 773           elem->weights[weight_cnt].w[0] = val;
 774           elem->weights[weight_cnt].cnt = 1;
 775         }
 776       else if (arg->tok == tok_string)
 777         {
 778           /* Split the string up in the individual characters and put
 779              the element definitions in the list.  */
 780           const char *cp = arg->val.str.startmb;
 781           int cnt = 0;
 782           struct element_t *charelem;
 783           struct element_t **weights = NULL;
 784           int max = 0;
 785
 786           if (*cp == '\0')
 787             {
 788               lr_error (ldfile, _("%s: empty weight string not allowed"),
 789                         "LC_COLLATE");
 790               lr_ignore_rest (ldfile, 0);
 791               break;
 792             }
 793
 794           do
 795             {
 796               if (*cp == '<')
 797                 {
 798                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 799                      the latter we have to unify the name.  */
 800                   const char *startp = ++cp;
 801                   size_t len;
 802
 803                   while (*cp != '>')
 804                     {
 805                       if (*cp == ldfile->escape_char)
 806                         ++cp;
 807                       if (*cp == '\0')
 808                         /* It's a syntax error.  */
 809                         goto syntax;
 810
 811                       ++cp;
 812                     }
 813
 814                   if (cp - startp == 5 && startp[0] == 'U'
 815                       && isxdigit (startp[1]) && isxdigit (startp[2])
 816                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 817                     {
 818                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 819                       char *newstr;
 820
 821                       newstr = (char *) xmalloc (10);
 822                       snprintf (newstr, 10, "U%08X", ucs4);
 823                       startp = newstr;
 824
 825                       len = 9;
 826                     }
 827                   else
 828                     len = cp - startp;
 829
 830                   charelem = find_element (ldfile, collate, startp, len);
 831                   ++cp;
 832                 }
 833               else
 834                 {
 835                   /* People really shouldn't use characters directly in
 836                      the string.  Especially since it's not really clear
 837                      what this means.  We interpret all characters in the
 838                      string as if that would be bsymbols.  Otherwise we
 839                      would have to match back to bsymbols somehow and this
 840                      is normally not what people normally expect.  */
 841                   charelem = find_element (ldfile, collate, cp++, 1);
 842                 }
 843
 844               if (charelem == NULL)
 845                 {
 846                   /* We ignore the rest of the line.  */
 847                   lr_ignore_rest (ldfile, 0);
 848                   break;
 849                 }
 850
 851               /* Add the pointer.  */
 852               if (cnt >= max)
 853                 {
 854                   struct element_t **newp;
 855                   max += 10;
 856                   newp = (struct element_t **)
 857                     alloca (max * sizeof (struct element_t *));
 858                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 859                   weights = newp;
 860                 }
 861               weights[cnt++] = charelem;
 862             }
 863           while (*cp != '\0');
 864
 865           /* Now store the information.  */
 866           elem->weights[weight_cnt].w = (struct element_t **)
 867             obstack_alloc (&collate->mempool,
 868                            cnt * sizeof (struct element_t *));
 869           memcpy (elem->weights[weight_cnt].w, weights,
 870                   cnt * sizeof (struct element_t *));
 871           elem->weights[weight_cnt].cnt = cnt;
 872
 873           /* We don't need the string anymore.  */
 874           free (arg->val.str.startmb);
 875         }
 876       else if (ellipsis != tok_none
 877                && (arg->tok == tok_ellipsis2
 878                    || arg->tok == tok_ellipsis3
 879                    || arg->tok == tok_ellipsis4))
 880         {
 881           /* It must be the same ellipsis as used in the initial column.  */
 882           if (arg->tok != ellipsis)
 883             lr_error (ldfile, _("\
 884 %s: weights must use the same ellipsis symbol as the name"),
 885                       "LC_COLLATE");
 886
 887           /* The weight for this level will depend on the element
 888              iterating over the range.  Put a placeholder.  */
 889           elem->weights[weight_cnt].w = (struct element_t **)
 890             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 891           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 892           elem->weights[weight_cnt].cnt = 1;
 893         }
 894       else
 895         {
 896         syntax:
 897           /* It's a syntax error.  */
 898           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 899           lr_ignore_rest (ldfile, 0);
 900           break;
 901         }
 902
 903       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 904       /* This better should be the end of the line or a semicolon.  */
 905       if (arg->tok == tok_semicolon)
 906         /* OK, ignore this and read the next token.  */
 907         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 908       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 909         {
 910           /* It's a syntax error.  */
 911           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 912           lr_ignore_rest (ldfile, 0);
 913           break;
 914         }
 915     }
 916   while (++weight_cnt < nrules);
 917
 918   if (weight_cnt < nrules)
 919     {
 920       /* This means the rest of the line uses the current element as
 921          the weight.  */
 922       do
 923         {
 924           elem->weights[weight_cnt].w = (struct element_t **)
 925             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 926           if (ellipsis == tok_none)
 927             elem->weights[weight_cnt].w[0] = elem;
 928           else
 929             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 930           elem->weights[weight_cnt].cnt = 1;
 931         }
 932       while (++weight_cnt < nrules);
 933     }
 934   else
 935     {
 936       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 937         {
 938           /* Too many rule values.  */
 939           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 940           lr_ignore_rest (ldfile, 0);
 941         }
 942       else
 943         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 944     }
 945 }
 946
 947
 948 static int
 949 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 950               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 951               struct localedef_t *result)
 952 {
 953   /* First find out what kind of symbol this is.  */
 954   struct charseq *seq;
 955   uint32_t wc;
 956   struct element_t *elem = NULL;
 957   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 958
 959   /* Try to find the character in the charmap.  */
 960   seq = charmap_find_value (charmap, symstr, symlen);
 961
 962   /* Determine the wide character.  */
 963   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 964     {
 965       wc = repertoire_find_value (repertoire, symstr, symlen);
 966       if (seq != NULL)
 967         seq->ucs4 = wc;
 968     }
 969   else
 970     wc = seq->ucs4;
 971
 972   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 973     {
 974       /* It's no character, so look through the collation elements and
 975          symbol list.  */
 976       void *ptr = elem;
 977       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 978         {
 979           void *result;
 980           struct symbol_t *sym = NULL;
 981
 982           /* It's also collation element.  Therefore it's either a
 983              collating symbol or it's a character which is not
 984              supported by the character set.  In the later case we
 985              simply create a dummy entry.  */
 986           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 987             {
 988               /* It's a collation symbol.  */
 989               sym = (struct symbol_t *) result;
 990
 991               elem = sym->order;
 992             }
 993
 994           if (elem == NULL)
 995             {
 996               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 997
 998               if (sym != NULL)
 999                 sym->order = elem;
1000               else
1001                 /* Enter a fake element in the sequence table.  This
1002                    won't cause anything in the output since there is
1003                    no multibyte or wide character associated with
1004                    it.  */
1005                 insert_entry (&collate->seq_table, symstr, symlen, elem);
1006             }
1007         }
1008       else
1009         /* Copy the result back.  */
1010         elem = ptr;
1011     }
1012   else
1013     {
1014       /* Otherwise the symbols stands for a character.  */
1015       void *ptr = elem;
1016       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1017         {
1018           uint32_t wcs[2] = { wc, 0 };
1019
1020           /* We have to allocate an entry.  */
1021           elem = new_element (collate,
1022                               seq != NULL ? (char *) seq->bytes : NULL,
1023                               seq != NULL ? seq->nbytes : 0,
1024                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1025                               symstr, symlen, 1);
1026
1027           /* And add it to the table.  */
1028           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1029             /* This cannot happen.  */
1030             assert (! "Internal error");
1031         }
1032       else
1033         {
1034           /* Copy the result back.  */
1035           elem = ptr;
1036
1037           /* Maybe the character was used before the definition.  In this case
1038              we have to insert the byte sequences now.  */
1039           if (elem->mbs == NULL && seq != NULL)
1040             {
1041               elem->mbs = obstack_copy0 (&collate->mempool,
1042                                          seq->bytes, seq->nbytes);
1043               elem->nmbs = seq->nbytes;
1044             }
1045
1046           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1047             {
1048               uint32_t wcs[2] = { wc, 0 };
1049
1050               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1051               elem->nwcs = 1;
1052             }
1053         }
1054     }
1055
1056   /* Test whether this element is not already in the list.  */
1057   if (elem->next != NULL || elem == collate->cursor)
1058     {
1059       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1060                 (int) symlen, symstr, elem->file, elem->line);
1061       lr_ignore_rest (ldfile, 0);
1062       return 1;
1063     }
1064
1065   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1066
1067   return 0;
1068 }
1069
1070
1071 static void
1072 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1073                  enum token_t ellipsis, const struct charmap_t *charmap,
1074                  struct repertoire_t *repertoire,
1075                  struct localedef_t *result)
1076 {
1077   struct element_t *startp;
1078   struct element_t *endp;
1079   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1080
1081   /* Unlink the entry added for the ellipsis.  */
1082   unlink_element (collate);
1083   startp = collate->cursor;
1084
1085   /* Process and add the end-entry.  */
1086   if (symstr != NULL
1087       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1088     /* Something went wrong with inserting the to-value.  This means
1089        we cannot process the ellipsis.  */
1090     return;
1091
1092   /* Reset the cursor.  */
1093   collate->cursor = startp;
1094
1095   /* Now we have to handle many different situations:
1096      - we have to distinguish between the three different ellipsis forms
1097      - the is the ellipsis at the beginning, in the middle, or at the end.
1098   */
1099   endp = collate->cursor->next;
1100   assert (symstr == NULL || endp != NULL);
1101
1102   /* XXX The following is probably very wrong since also collating symbols
1103      can appear in ranges.  But do we want/can refine the test for that?  */
1104 #if 0
1105   /* Both, the start and the end symbol, must stand for characters.  */
1106   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1107       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1108     {
1109       lr_error (ldfile, _("\
1110 %s: the start and the end symbol of a range must stand for characters"),
1111                 "LC_COLLATE");
1112       return;
1113     }
1114 #endif
1115
1116   if (ellipsis == tok_ellipsis3)
1117     {
1118       /* One requirement we make here: the length of the byte
1119          sequences for the first and end character must be the same.
1120          This is mainly to prevent unwanted effects and this is often
1121          not what is wanted.  */
1122       size_t len = (startp->mbs != NULL ? startp->nmbs
1123                     : (endp->mbs != NULL ? endp->nmbs : 0));
1124       char mbcnt[len + 1];
1125       char mbend[len + 1];
1126
1127       /* Well, this should be caught somewhere else already.  Just to
1128          make sure.  */
1129       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1130       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1131
1132       if (startp != NULL && endp != NULL
1133           && startp->mbs != NULL && endp->mbs != NULL
1134           && startp->nmbs != endp->nmbs)
1135         {
1136           lr_error (ldfile, _("\
1137 %s: byte sequences of first and last character must have the same length"),
1138                     "LC_COLLATE");
1139           return;
1140         }
1141
1142       /* Determine whether we have to generate multibyte sequences.  */
1143       if ((startp == NULL || startp->mbs != NULL)
1144           && (endp == NULL || endp->mbs != NULL))
1145         {
1146           int cnt;
1147           int ret;
1148
1149           /* Prepare the beginning byte sequence.  This is either from the
1150              beginning byte sequence or it is all nulls if it was an
1151              initial ellipsis.  */
1152           if (startp == NULL || startp->mbs == NULL)
1153             memset (mbcnt, '\0', len);
1154           else
1155             {
1156               memcpy (mbcnt, startp->mbs, len);
1157
1158               /* And increment it so that the value is the first one we will
1159                  try to insert.  */
1160               for (cnt = len - 1; cnt >= 0; --cnt)
1161                 if (++mbcnt[cnt] != '\0')
1162                   break;
1163             }
1164           mbcnt[len] = '\0';
1165
1166           /* And the end sequence.  */
1167           if (endp == NULL || endp->mbs == NULL)
1168             memset (mbend, '\0', len);
1169           else
1170             memcpy (mbend, endp->mbs, len);
1171           mbend[len] = '\0';
1172
1173           /* Test whether we have a correct range.  */
1174           ret = memcmp (mbcnt, mbend, len);
1175           if (ret >= 0)
1176             {
1177               if (ret > 0)
1178                 lr_error (ldfile, _("%s: byte sequence of first character of \
1179 range is not lower than that of the last character"), "LC_COLLATE");
1180               return;
1181             }
1182
1183           /* Generate the byte sequences data.  */
1184           while (1)
1185             {
1186               struct charseq *seq;
1187
1188               /* Quite a bit of work ahead.  We have to find the character
1189                  definition for the byte sequence and then determine the
1190                  wide character belonging to it.  */
1191               seq = charmap_find_symbol (charmap, mbcnt, len);
1192               if (seq != NULL)
1193                 {
1194                   struct element_t *elem;
1195                   size_t namelen;
1196
1197                   /* I don't think this can ever happen.  */
1198                   assert (seq->name != NULL);
1199                   namelen = strlen (seq->name);
1200
1201                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1202                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1203                                                        namelen);
1204
1205                   /* Now we are ready to insert the new value in the
1206                      sequence.  Find out whether the element is
1207                      already known.  */
1208                   void *ptr;
1209                   if (find_entry (&collate->seq_table, seq->name, namelen,
1210                                   &ptr) != 0)
1211                     {
1212                       uint32_t wcs[2] = { seq->ucs4, 0 };
1213
1214                       /* We have to allocate an entry.  */
1215                       elem = new_element (collate, mbcnt, len,
1216                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1217                                           ? NULL : wcs, seq->name,
1218                                           namelen, 1);
1219
1220                       /* And add it to the table.  */
1221                       if (insert_entry (&collate->seq_table, seq->name,
1222                                         namelen, elem) != 0)
1223                         /* This cannot happen.  */
1224                         assert (! "Internal error");
1225                     }
1226                   else
1227                     /* Copy the result.  */
1228                     elem = ptr;
1229
1230                   /* Test whether this element is not already in the list.  */
1231                   if (elem->next != NULL || (collate->cursor != NULL
1232                                              && elem->next == collate->cursor))
1233                     {
1234                       lr_error (ldfile, _("\
1235 order for `%.*s' already defined at %s:%Zu"),
1236                                 (int) namelen, seq->name,
1237                                 elem->file, elem->line);
1238                       goto increment;
1239                     }
1240
1241                   /* Enqueue the new element.  */
1242                   elem->last = collate->cursor;
1243                   if (collate->cursor == NULL)
1244                     elem->next = NULL;
1245                   else
1246                     {
1247                       elem->next = collate->cursor->next;
1248                       elem->last->next = elem;
1249                       if (elem->next != NULL)
1250                         elem->next->last = elem;
1251                     }
1252                   if (collate->start == NULL)
1253                     {
1254                       assert (collate->cursor == NULL);
1255                       collate->start = elem;
1256                     }
1257                   collate->cursor = elem;
1258
1259                  /* Add the weight value.  We take them from the
1260                     `ellipsis_weights' member of `collate'.  */
1261                   elem->weights = (struct element_list_t *)
1262                     obstack_alloc (&collate->mempool,
1263                                    nrules * sizeof (struct element_list_t));
1264                   for (cnt = 0; cnt < nrules; ++cnt)
1265                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1266                         && (collate->ellipsis_weight.weights[cnt].w[0]
1267                             == ELEMENT_ELLIPSIS2))
1268                       {
1269                         elem->weights[cnt].w = (struct element_t **)
1270                           obstack_alloc (&collate->mempool,
1271                                          sizeof (struct element_t *));
1272                         elem->weights[cnt].w[0] = elem;
1273                         elem->weights[cnt].cnt = 1;
1274                       }
1275                     else
1276                       {
1277                         /* Simply use the weight from `ellipsis_weight'.  */
1278                         elem->weights[cnt].w =
1279                           collate->ellipsis_weight.weights[cnt].w;
1280                         elem->weights[cnt].cnt =
1281                           collate->ellipsis_weight.weights[cnt].cnt;
1282                       }
1283                 }
1284
1285               /* Increment for the next round.  */
1286             increment:
1287               for (cnt = len - 1; cnt >= 0; --cnt)
1288                 if (++mbcnt[cnt] != '\0')
1289                   break;
1290
1291               /* Find out whether this was all.  */
1292               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1293                 /* Yep, that's all.  */
1294                 break;
1295             }
1296         }
1297     }
1298   else
1299     {
1300       /* For symbolic range we naturally must have a beginning and an
1301          end specified by the user.  */
1302       if (startp == NULL)
1303         lr_error (ldfile, _("\
1304 %s: symbolic range ellipsis must not directly follow `order_start'"),
1305                   "LC_COLLATE");
1306       else if (endp == NULL)
1307         lr_error (ldfile, _("\
1308 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1309                   "LC_COLLATE");
1310       else
1311         {
1312           /* Determine the range.  To do so we have to determine the
1313              common prefix of the both names and then the numeric
1314              values of both ends.  */
1315           size_t lenfrom = strlen (startp->name);
1316           size_t lento = strlen (endp->name);
1317           char buf[lento + 1];
1318           int preflen = 0;
1319           long int from;
1320           long int to;
1321           char *cp;
1322           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1323
1324           if (lenfrom != lento)
1325             {
1326             invalid_range:
1327               lr_error (ldfile, _("\
1328 `%s' and `%.*s' are not valid names for symbolic range"),
1329                         startp->name, (int) lento, endp->name);
1330               return;
1331             }
1332
1333           while (startp->name[preflen] == endp->name[preflen])
1334             if (startp->name[preflen] == '\0')
1335               /* Nothing to be done.  The start and end point are identical
1336                  and while inserting the end point we have already given
1337                  the user an error message.  */
1338               return;
1339             else
1340               ++preflen;
1341
1342           errno = 0;
1343           from = strtol (startp->name + preflen, &cp, base);
1344           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1345             goto invalid_range;
1346
1347           errno = 0;
1348           to = strtol (endp->name + preflen, &cp, base);
1349           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1350             goto invalid_range;
1351
1352           /* Copy the prefix.  */
1353           memcpy (buf, startp->name, preflen);
1354
1355           /* Loop over all values.  */
1356           for (++from; from < to; ++from)
1357             {
1358               struct element_t *elem = NULL;
1359               struct charseq *seq;
1360               uint32_t wc;
1361               int cnt;
1362
1363               /* Generate the name.  */
1364               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1365                        (int) (lenfrom - preflen), from);
1366
1367               /* Look whether this name is already defined.  */
1368               void *ptr;
1369               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1370                 {
1371                   /* Copy back the result.  */
1372                   elem = ptr;
1373
1374                   if (elem->next != NULL || (collate->cursor != NULL
1375                                              && elem->next == collate->cursor))
1376                     {
1377                       lr_error (ldfile, _("\
1378 %s: order for `%.*s' already defined at %s:%Zu"),
1379                                 "LC_COLLATE", (int) lenfrom, buf,
1380                                 elem->file, elem->line);
1381                       continue;
1382                     }
1383
1384                   if (elem->name == NULL)
1385                     {
1386                       lr_error (ldfile, _("%s: `%s' must be a character"),
1387                                 "LC_COLLATE", buf);
1388                       continue;
1389                     }
1390                 }
1391
1392               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1393                 {
1394                   /* Search for a character of this name.  */
1395                   seq = charmap_find_value (charmap, buf, lenfrom);
1396                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1397                     {
1398                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1399
1400                       if (seq != NULL)
1401                         seq->ucs4 = wc;
1402                     }
1403                   else
1404                     wc = seq->ucs4;
1405
1406                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1407                     /* We don't know anything about a character with this
1408                        name.  XXX Should we warn?  */
1409                     continue;
1410
1411                   if (elem == NULL)
1412                     {
1413                       uint32_t wcs[2] = { wc, 0 };
1414
1415                       /* We have to allocate an entry.  */
1416                       elem = new_element (collate,
1417                                           seq != NULL
1418                                           ? (char *) seq->bytes : NULL,
1419                                           seq != NULL ? seq->nbytes : 0,
1420                                           wc == ILLEGAL_CHAR_VALUE
1421                                           ? NULL : wcs, buf, lenfrom, 1);
1422                     }
1423                   else
1424                     {
1425                       /* Update the element.  */
1426                       if (seq != NULL)
1427                         {
1428                           elem->mbs = obstack_copy0 (&collate->mempool,
1429                                                      seq->bytes, seq->nbytes);
1430                           elem->nmbs = seq->nbytes;
1431                         }
1432
1433                       if (wc != ILLEGAL_CHAR_VALUE)
1434                         {
1435                           uint32_t zero = 0;
1436
1437                           obstack_grow (&collate->mempool,
1438                                         &wc, sizeof (uint32_t));
1439                           obstack_grow (&collate->mempool,
1440                                         &zero, sizeof (uint32_t));
1441                           elem->wcs = obstack_finish (&collate->mempool);
1442                           elem->nwcs = 1;
1443                         }
1444                     }
1445
1446                   elem->file = ldfile->fname;
1447                   elem->line = ldfile->lineno;
1448                   elem->section = collate->current_section;
1449                 }
1450
1451               /* Enqueue the new element.  */
1452               elem->last = collate->cursor;
1453               elem->next = collate->cursor->next;
1454               elem->last->next = elem;
1455               if (elem->next != NULL)
1456                 elem->next->last = elem;
1457               collate->cursor = elem;
1458
1459               /* Now add the weights.  They come from the `ellipsis_weights'
1460                  member of `collate'.  */
1461               elem->weights = (struct element_list_t *)
1462                 obstack_alloc (&collate->mempool,
1463                                nrules * sizeof (struct element_list_t));
1464               for (cnt = 0; cnt < nrules; ++cnt)
1465                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1466                     && (collate->ellipsis_weight.weights[cnt].w[0]
1467                         == ELEMENT_ELLIPSIS2))
1468                   {
1469                     elem->weights[cnt].w = (struct element_t **)
1470                       obstack_alloc (&collate->mempool,
1471                                      sizeof (struct element_t *));
1472                     elem->weights[cnt].w[0] = elem;
1473                     elem->weights[cnt].cnt = 1;
1474                   }
1475                 else
1476                   {
1477                     /* Simly use the weight from `ellipsis_weight'.  */
1478                     elem->weights[cnt].w =
1479                       collate->ellipsis_weight.weights[cnt].w;
1480                     elem->weights[cnt].cnt =
1481                       collate->ellipsis_weight.weights[cnt].cnt;
1482                   }
1483             }
1484         }
1485     }
1486 }
1487
1488
1489 static void
1490 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1491                  struct localedef_t *copy_locale, int ignore_content)
1492 {
1493   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1494     {
1495       struct locale_collate_t *collate;
1496
1497       if (copy_locale == NULL)
1498         {
1499           collate = locale->categories[LC_COLLATE].collate =
1500             (struct locale_collate_t *)
1501             xcalloc (1, sizeof (struct locale_collate_t));
1502
1503           /* Init the various data structures.  */
1504           init_hash (&collate->elem_table, 100);
1505           init_hash (&collate->sym_table, 100);
1506           init_hash (&collate->seq_table, 500);
1507           obstack_init (&collate->mempool);
1508
1509           collate->col_weight_max = -1;
1510         }
1511       else
1512         /* Reuse the copy_locale's data structures.  */
1513         collate = locale->categories[LC_COLLATE].collate =
1514           copy_locale->categories[LC_COLLATE].collate;
1515     }
1516
1517   ldfile->translate_strings = 0;
1518   ldfile->return_widestr = 0;
1519 }
1520
1521
1522 void
1523 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1524 {
1525   /* Now is the time when we can assign the individual collation
1526      values for all the symbols.  We have possibly different values
1527      for the wide- and the multibyte-character symbols.  This is done
1528      since it might make a difference in the encoding if there is in
1529      some cases no multibyte-character but there are wide-characters.
1530      (The other way around it is not important since theencoded
1531      collation value in the wide-character case is 32 bits wide and
1532      therefore requires no encoding).
1533
1534      The lowest collation value assigned is 2.  Zero is reserved for
1535      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1536      functions and 1 is used to separate the individual passes for the
1537      different rules.
1538
1539      We also have to construct is list with all the bytes/words which
1540      can come first in a sequence, followed by all the elements which
1541      also start with this byte/word.  The order is reverse which has
1542      among others the important effect that longer strings are located
1543      first in the list.  This is required for the output data since
1544      the algorithm used in `strcoll' etc depends on this.
1545
1546      The multibyte case is easy.  We simply sort into an array with
1547      256 elements.  */
1548   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1549   int mbact[nrules];
1550   int wcact;
1551   int mbseqact;
1552   int wcseqact;
1553   struct element_t *runp;
1554   int i;
1555   int need_undefined = 0;
1556   struct section_list *sect;
1557   int ruleidx;
1558   int nr_wide_elems = 0;
1559
1560   if (collate == NULL)
1561     {
1562       /* No data, no check. Issue a warning.  */
1563       record_warning (_("No definition for %s category found"),
1564                       "LC_COLLATE");
1565       return;
1566     }
1567
1568   /* If this assertion is hit change the type in `element_t'.  */
1569   assert (nrules <= sizeof (runp->used_in_level) * 8);
1570
1571   /* Make sure that the `position' rule is used either in all sections
1572      or in none.  */
1573   for (i = 0; i < nrules; ++i)
1574     for (sect = collate->sections; sect != NULL; sect = sect->next)
1575       if (sect != collate->current_section
1576           && sect->rules != NULL
1577           && ((sect->rules[i] & sort_position)
1578               != (collate->current_section->rules[i] & sort_position)))
1579         {
1580           record_error (0, 0, _("\
1581 %s: `position' must be used for a specific level in all sections or none"),
1582                         "LC_COLLATE");
1583           break;
1584         }
1585
1586   /* Find out which elements are used at which level.  At the same
1587      time we find out whether we have any undefined symbols.  */
1588   runp = collate->start;
1589   while (runp != NULL)
1590     {
1591       if (runp->mbs != NULL)
1592         {
1593           for (i = 0; i < nrules; ++i)
1594             {
1595               int j;
1596
1597               for (j = 0; j < runp->weights[i].cnt; ++j)
1598                 /* A NULL pointer as the weight means IGNORE.  */
1599                 if (runp->weights[i].w[j] != NULL)
1600                   {
1601                     if (runp->weights[i].w[j]->weights == NULL)
1602                       {
1603                         record_error_at_line (0, 0, runp->file, runp->line,
1604                                               _("symbol `%s' not defined"),
1605                                               runp->weights[i].w[j]->name);
1606
1607                         need_undefined = 1;
1608                         runp->weights[i].w[j] = &collate->undefined;
1609                       }
1610                     else
1611                       /* Set the bit for the level.  */
1612                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1613                   }
1614             }
1615         }
1616
1617       /* Up to the next entry.  */
1618       runp = runp->next;
1619     }
1620
1621   /* Walk through the list of defined sequences and assign weights.  Also
1622      create the data structure which will allow generating the single byte
1623      character based tables.
1624
1625      Since at each time only the weights for each of the rules are
1626      only compared to other weights for this rule it is possible to
1627      assign more compact weight values than simply counting all
1628      weights in sequence.  We can assign weights from 3, one for each
1629      rule individually and only for those elements, which are actually
1630      used for this rule.
1631
1632      Why is this important?  It is not for the wide char table.  But
1633      it is for the singlebyte output since here larger numbers have to
1634      be encoded to make it possible to emit the value as a byte
1635      string.  */
1636   for (i = 0; i < nrules; ++i)
1637     mbact[i] = 2;
1638   wcact = 2;
1639   mbseqact = 0;
1640   wcseqact = 0;
1641   runp = collate->start;
1642   while (runp != NULL)
1643     {
1644       /* Determine the order.  */
1645       if (runp->used_in_level != 0)
1646         {
1647           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1648                                                  nrules * sizeof (int));
1649
1650           for (i = 0; i < nrules; ++i)
1651             if ((runp->used_in_level & (1 << i)) != 0)
1652               runp->mborder[i] = mbact[i]++;
1653             else
1654               runp->mborder[i] = 0;
1655         }
1656
1657       if (runp->mbs != NULL)
1658         {
1659           struct element_t **eptr;
1660           struct element_t *lastp = NULL;
1661
1662           /* Find the point where to insert in the list.  */
1663           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1664           while (*eptr != NULL)
1665             {
1666               if ((*eptr)->nmbs < runp->nmbs)
1667                 break;
1668
1669               if ((*eptr)->nmbs == runp->nmbs)
1670                 {
1671                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1672
1673                   if (c == 0)
1674                     {
1675                       /* This should not happen.  It means that we have
1676                          to symbols with the same byte sequence.  It is
1677                          of course an error.  */
1678                       record_error_at_line (0, 0, (*eptr)->file,
1679                                             (*eptr)->line,
1680                                             _("\
1681 symbol `%s' has the same encoding as"), (*eptr)->name);
1682
1683                       record_error_at_line (0, 0, runp->file, runp->line,
1684                                             _("symbol `%s'"), runp->name);
1685                       goto dont_insert;
1686                     }
1687                   else if (c < 0)
1688                     /* Insert it here.  */
1689                     break;
1690                 }
1691
1692               /* To the next entry.  */
1693               lastp = *eptr;
1694               eptr = &(*eptr)->mbnext;
1695             }
1696
1697           /* Set the pointers.  */
1698           runp->mbnext = *eptr;
1699           runp->mblast = lastp;
1700           if (*eptr != NULL)
1701             (*eptr)->mblast = runp;
1702           *eptr = runp;
1703         dont_insert:
1704           ;
1705         }
1706
1707       if (runp->used_in_level)
1708         {
1709           runp->wcorder = wcact++;
1710
1711           /* We take the opportunity to count the elements which have
1712              wide characters.  */
1713           ++nr_wide_elems;
1714         }
1715
1716       if (runp->is_character)
1717         {
1718           if (runp->nmbs == 1)
1719             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1720
1721           runp->wcseqorder = wcseqact++;
1722         }
1723       else if (runp->mbs != NULL && runp->weights != NULL)
1724         /* This is for collation elements.  */
1725         runp->wcseqorder = wcseqact++;
1726
1727       /* Up to the next entry.  */
1728       runp = runp->next;
1729     }
1730
1731   /* Find out whether any of the `mbheads' entries is unset.  In this
1732      case we use the UNDEFINED entry.  */
1733   for (i = 1; i < 256; ++i)
1734     if (collate->mbheads[i] == NULL)
1735       {
1736         need_undefined = 1;
1737         collate->mbheads[i] = &collate->undefined;
1738       }
1739
1740   /* Now to the wide character case.  */
1741   collate->wcheads.p = 6;
1742   collate->wcheads.q = 10;
1743   wchead_table_init (&collate->wcheads);
1744
1745   collate->wcseqorder.p = 6;
1746   collate->wcseqorder.q = 10;
1747   collseq_table_init (&collate->wcseqorder);
1748
1749   /* Start adding.  */
1750   runp = collate->start;
1751   while (runp != NULL)
1752     {
1753       if (runp->wcs != NULL)
1754         {
1755           struct element_t *e;
1756           struct element_t **eptr;
1757           struct element_t *lastp;
1758
1759           /* Insert the collation sequence value.  */
1760           if (runp->is_character)
1761             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1762                                runp->wcseqorder);
1763
1764           /* Find the point where to insert in the list.  */
1765           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1766           eptr = &e;
1767           lastp = NULL;
1768           while (*eptr != NULL)
1769             {
1770               if ((*eptr)->nwcs < runp->nwcs)
1771                 break;
1772
1773               if ((*eptr)->nwcs == runp->nwcs)
1774                 {
1775                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1776                                    (wchar_t *) runp->wcs, runp->nwcs);
1777
1778                   if (c == 0)
1779                     {
1780                       /* This should not happen.  It means that we have
1781                          two symbols with the same byte sequence.  It is
1782                          of course an error.  */
1783                       record_error_at_line (0, 0, (*eptr)->file,
1784                                             (*eptr)->line,
1785                                             _("\
1786 symbol `%s' has the same encoding as"), (*eptr)->name);
1787
1788                       record_error_at_line (0, 0, runp->file, runp->line,
1789                                             _("symbol `%s'"), runp->name);
1790                       goto dont_insertwc;
1791                     }
1792                   else if (c < 0)
1793                     /* Insert it here.  */
1794                     break;
1795                 }
1796
1797               /* To the next entry.  */
1798               lastp = *eptr;
1799               eptr = &(*eptr)->wcnext;
1800             }
1801
1802           /* Set the pointers.  */
1803           runp->wcnext = *eptr;
1804           runp->wclast = lastp;
1805           if (*eptr != NULL)
1806             (*eptr)->wclast = runp;
1807           *eptr = runp;
1808           if (eptr == &e)
1809             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1810         dont_insertwc:
1811           ;
1812         }
1813
1814       /* Up to the next entry.  */
1815       runp = runp->next;
1816     }
1817
1818   /* Now determine whether the UNDEFINED entry is needed and if yes,
1819      whether it was defined.  */
1820   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1821   if (collate->undefined.file == NULL)
1822     {
1823       if (need_undefined)
1824         {
1825           /* This seems not to be enforced by recent standards.  Don't
1826              emit an error, simply append UNDEFINED at the end.  */
1827           collate->undefined.mborder =
1828             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1829
1830           for (i = 0; i < nrules; ++i)
1831             collate->undefined.mborder[i] = mbact[i]++;
1832         }
1833
1834       /* In any case we will need the definition for the wide character
1835          case.  But we will not complain that it is missing since the
1836          specification strangely enough does not seem to account for
1837          this.  */
1838       collate->undefined.wcorder = wcact++;
1839     }
1840
1841   /* Finally, try to unify the rules for the sections.  Whenever the rules
1842      for a section are the same as those for another section give the
1843      ruleset the same index.  Since there are never many section we can
1844      use an O(n^2) algorithm here.  */
1845   sect = collate->sections;
1846   while (sect != NULL && sect->rules == NULL)
1847     sect = sect->next;
1848
1849   /* Bail out if we have no sections because of earlier errors.  */
1850   if (sect == NULL)
1851     {
1852       record_error (EXIT_FAILURE, 0, _("too many errors; giving up"));
1853       return;
1854     }
1855
1856   ruleidx = 0;
1857   do
1858     {
1859       struct section_list *osect = collate->sections;
1860
1861       while (osect != sect)
1862         if (osect->rules != NULL
1863             && memcmp (osect->rules, sect->rules,
1864                        nrules * sizeof (osect->rules[0])) == 0)
1865           break;
1866         else
1867           osect = osect->next;
1868
1869       if (osect == sect)
1870         sect->ruleidx = ruleidx++;
1871       else
1872         sect->ruleidx = osect->ruleidx;
1873
1874       /* Next section.  */
1875       do
1876         sect = sect->next;
1877       while (sect != NULL && sect->rules == NULL);
1878     }
1879   while (sect != NULL);
1880   /* We are currently not prepared for more than 128 rulesets.  But this
1881      should never really be a problem.  */
1882   assert (ruleidx <= 128);
1883 }
1884
1885
1886 static int32_t
1887 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1888                struct element_t *elem)
1889 {
1890   size_t cnt;
1891   int32_t retval;
1892
1893   /* Optimize the use of UNDEFINED.  */
1894   if (elem == &collate->undefined)
1895     /* The weights are already inserted.  */
1896     return 0;
1897
1898   /* This byte can start exactly one collation element and this is
1899      a single byte.  We can directly give the index to the weights.  */
1900   retval = obstack_object_size (pool);
1901
1902   /* Construct the weight.  */
1903   for (cnt = 0; cnt < nrules; ++cnt)
1904     {
1905       char buf[elem->weights[cnt].cnt * 7];
1906       int len = 0;
1907       int i;
1908
1909       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1910         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1911         if (elem->weights[cnt].w[i] != NULL)
1912           len += utf8_encode (&buf[len],
1913                               elem->weights[cnt].w[i]->mborder[cnt]);
1914
1915       /* And add the buffer content.  */
1916       obstack_1grow (pool, len);
1917       obstack_grow (pool, buf, len);
1918     }
1919
1920   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1921 }
1922
1923
1924 static int32_t
1925 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1926                  struct element_t *elem)
1927 {
1928   size_t cnt;
1929   int32_t retval;
1930
1931   /* Optimize the use of UNDEFINED.  */
1932   if (elem == &collate->undefined)
1933     /* The weights are already inserted.  */
1934     return 0;
1935
1936   /* This byte can start exactly one collation element and this is
1937      a single byte.  We can directly give the index to the weights.  */
1938   retval = obstack_object_size (pool) / sizeof (int32_t);
1939
1940   /* Construct the weight.  */
1941   for (cnt = 0; cnt < nrules; ++cnt)
1942     {
1943       int32_t buf[elem->weights[cnt].cnt];
1944       int i;
1945       int32_t j;
1946
1947       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1948         if (elem->weights[cnt].w[i] != NULL)
1949           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1950
1951       /* And add the buffer content.  */
1952       obstack_int32_grow (pool, j);
1953
1954       obstack_grow (pool, buf, j * sizeof (int32_t));
1955       maybe_swap_uint32_obstack (pool, j);
1956     }
1957
1958   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1959 }
1960
1961 /* If localedef is every threaded, this would need to be __thread var.  */
1962 static struct
1963 {
1964   struct obstack *weightpool;
1965   struct obstack *extrapool;
1966   struct obstack *indpool;
1967   struct locale_collate_t *collate;
1968   struct collidx_table *tablewc;
1969 } atwc;
1970
1971 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1972
1973 static void
1974 add_to_tablewc (uint32_t ch, struct element_t *runp)
1975 {
1976   if (runp->wcnext == NULL && runp->nwcs == 1)
1977     {
1978       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1979                                            runp);
1980       collidx_table_add (atwc.tablewc, ch, weigthidx);
1981     }
1982   else
1983     {
1984       /* As for the singlebyte table, we recognize sequences and
1985          compress them.  */
1986
1987       collidx_table_add (atwc.tablewc, ch,
1988                          -(obstack_object_size (atwc.extrapool)
1989                          / sizeof (uint32_t)));
1990
1991       do
1992         {
1993           /* Store the current index in the weight table.  We know that
1994              the current position in the `extrapool' is aligned on a
1995              32-bit address.  */
1996           int32_t weightidx;
1997           int added;
1998
1999           /* Find out wether this is a single entry or we have more than
2000              one consecutive entry.  */
2001           if (runp->wcnext != NULL
2002               && runp->nwcs == runp->wcnext->nwcs
2003               && wmemcmp ((wchar_t *) runp->wcs,
2004                           (wchar_t *)runp->wcnext->wcs,
2005                           runp->nwcs - 1) == 0
2006               && (runp->wcs[runp->nwcs - 1]
2007                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2008             {
2009               int i;
2010               struct element_t *series_startp = runp;
2011               struct element_t *curp;
2012
2013               /* Now add first the initial byte sequence.  */
2014               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2015               if (sizeof (int32_t) == sizeof (int))
2016                 obstack_make_room (atwc.extrapool, added);
2017
2018               /* More than one consecutive entry.  We mark this by having
2019                  a negative index into the indirect table.  */
2020               obstack_int32_grow_fast (atwc.extrapool,
2021                                        -(obstack_object_size (atwc.indpool)
2022                                          / sizeof (int32_t)));
2023               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2024
2025               do
2026                 runp = runp->wcnext;
2027               while (runp->wcnext != NULL
2028                      && runp->nwcs == runp->wcnext->nwcs
2029                      && wmemcmp ((wchar_t *) runp->wcs,
2030                                  (wchar_t *)runp->wcnext->wcs,
2031                                  runp->nwcs - 1) == 0
2032                      && (runp->wcs[runp->nwcs - 1]
2033                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2034
2035               /* Now walk backward from here to the beginning.  */
2036               curp = runp;
2037
2038               for (i = 1; i < runp->nwcs; ++i)
2039                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2040
2041               /* Now find the end of the consecutive sequence and
2042                  add all the indeces in the indirect pool.  */
2043               do
2044                 {
2045                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2046                                                curp);
2047                   obstack_int32_grow (atwc.indpool, weightidx);
2048
2049                   curp = curp->wclast;
2050                 }
2051               while (curp != series_startp);
2052
2053               /* Add the final weight.  */
2054               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2055                                            curp);
2056               obstack_int32_grow (atwc.indpool, weightidx);
2057
2058               /* And add the end byte sequence.  Without length this
2059                  time.  */
2060               for (i = 1; i < curp->nwcs; ++i)
2061                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2062             }
2063           else
2064             {
2065               /* A single entry.  Simply add the index and the length and
2066                  string (except for the first character which is already
2067                  tested for).  */
2068               int i;
2069
2070               /* Output the weight info.  */
2071               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2072                                            runp);
2073
2074               assert (runp->nwcs > 0);
2075               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2076               if (sizeof (int) == sizeof (int32_t))
2077                 obstack_make_room (atwc.extrapool, added);
2078
2079               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2080               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2081               for (i = 1; i < runp->nwcs; ++i)
2082                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2083             }
2084
2085           /* Next entry.  */
2086           runp = runp->wcnext;
2087         }
2088       while (runp != NULL);
2089     }
2090 }
2091
2092 void
2093 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2094                 const char *output_path)
2095 {
2096   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2097   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2098   struct locale_file file;
2099   size_t ch;
2100   int32_t tablemb[256];
2101   struct obstack weightpool;
2102   struct obstack extrapool;
2103   struct obstack indirectpool;
2104   struct section_list *sect;
2105   struct collidx_table tablewc;
2106   uint32_t elem_size;
2107   uint32_t *elem_table;
2108   int i;
2109   struct element_t *runp;
2110
2111   init_locale_data (&file, nelems);
2112   add_locale_uint32 (&file, nrules);
2113
2114   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2115   if (collate == NULL)
2116     {
2117       size_t idx;
2118       for (idx = 1; idx < nelems; idx++)
2119         {
2120           /* The words have to be handled specially.  */
2121           if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2122             add_locale_uint32 (&file, 0);
2123           else
2124             add_locale_empty (&file);
2125         }
2126       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2127       return;
2128     }
2129
2130   obstack_init (&weightpool);
2131   obstack_init (&extrapool);
2132   obstack_init (&indirectpool);
2133
2134   /* Since we are using the sign of an integer to mark indirection the
2135      offsets in the arrays we are indirectly referring to must not be
2136      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2137   obstack_int32_grow (&extrapool, 0);
2138   obstack_int32_grow (&indirectpool, 0);
2139
2140   /* Prepare the ruleset table.  */
2141   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2142     if (sect->rules != NULL && sect->ruleidx == i)
2143       {
2144         int j;
2145
2146         obstack_make_room (&weightpool, nrules);
2147
2148         for (j = 0; j < nrules; ++j)
2149           obstack_1grow_fast (&weightpool, sect->rules[j]);
2150         ++i;
2151       }
2152   /* And align the output.  */
2153   i = (nrules * i) % LOCFILE_ALIGN;
2154   if (i > 0)
2155     do
2156       obstack_1grow (&weightpool, '\0');
2157     while (++i < LOCFILE_ALIGN);
2158
2159   add_locale_raw_obstack (&file, &weightpool);
2160
2161   /* Generate the 8-bit table.  Walk through the lists of sequences
2162      starting with the same byte and add them one after the other to
2163      the table.  In case we have more than one sequence starting with
2164      the same byte we have to use extra indirection.
2165
2166      First add a record for the NUL byte.  This entry will never be used
2167      so it does not matter.  */
2168   tablemb[0] = 0;
2169
2170   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2171      will probably be used more than once it is good to store the
2172      weights only once.  */
2173   if (collate->undefined.used_in_level != 0)
2174     output_weight (&weightpool, collate, &collate->undefined);
2175
2176   for (ch = 1; ch < 256; ++ch)
2177     if (collate->mbheads[ch]->mbnext == NULL
2178         && collate->mbheads[ch]->nmbs <= 1)
2179       {
2180         tablemb[ch] = output_weight (&weightpool, collate,
2181                                      collate->mbheads[ch]);
2182       }
2183     else
2184       {
2185         /* The entries in the list are sorted by length and then
2186            alphabetically.  This is the order in which we will add the
2187            elements to the collation table.  This allows simply walking
2188            the table in sequence and stopping at the first matching
2189            entry.  Since the longer sequences are coming first in the
2190            list they have the possibility to match first, just as it
2191            has to be.  In the worst case we are walking to the end of
2192            the list where we put, if no singlebyte sequence is defined
2193            in the locale definition, the weights for UNDEFINED.
2194
2195            To reduce the length of the search list we compress them a bit.
2196            This happens by collecting sequences of consecutive byte
2197            sequences in one entry (having and begin and end byte sequence)
2198            and add only one index into the weight table.  We can find the
2199            consecutive entries since they are also consecutive in the list.  */
2200         struct element_t *runp = collate->mbheads[ch];
2201         struct element_t *lastp;
2202
2203         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2204
2205         tablemb[ch] = -obstack_object_size (&extrapool);
2206
2207         do
2208           {
2209             /* Store the current index in the weight table.  We know that
2210                the current position in the `extrapool' is aligned on a
2211                32-bit address.  */
2212             int32_t weightidx;
2213             int added;
2214
2215             /* Find out wether this is a single entry or we have more than
2216                one consecutive entry.  */
2217             if (runp->mbnext != NULL
2218                 && runp->nmbs == runp->mbnext->nmbs
2219                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2220                 && (runp->mbs[runp->nmbs - 1]
2221                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2222               {
2223                 int i;
2224                 struct element_t *series_startp = runp;
2225                 struct element_t *curp;
2226
2227                 /* Compute how much space we will need.  */
2228                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2229                                           + 2 * (runp->nmbs - 1));
2230                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2231                 obstack_make_room (&extrapool, added);
2232
2233                 /* More than one consecutive entry.  We mark this by having
2234                    a negative index into the indirect table.  */
2235                 obstack_int32_grow_fast (&extrapool,
2236                                          -(obstack_object_size (&indirectpool)
2237                                            / sizeof (int32_t)));
2238
2239                 /* Now search first the end of the series.  */
2240                 do
2241                   runp = runp->mbnext;
2242                 while (runp->mbnext != NULL
2243                        && runp->nmbs == runp->mbnext->nmbs
2244                        && memcmp (runp->mbs, runp->mbnext->mbs,
2245                                   runp->nmbs - 1) == 0
2246                        && (runp->mbs[runp->nmbs - 1]
2247                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2248
2249                 /* Now walk backward from here to the beginning.  */
2250                 curp = runp;
2251
2252                 assert (runp->nmbs <= 256);
2253                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2254                 for (i = 1; i < curp->nmbs; ++i)
2255                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2256
2257                 /* Now find the end of the consecutive sequence and
2258                    add all the indeces in the indirect pool.  */
2259                 do
2260                   {
2261                     weightidx = output_weight (&weightpool, collate, curp);
2262                     obstack_int32_grow (&indirectpool, weightidx);
2263
2264                     curp = curp->mblast;
2265                   }
2266                 while (curp != series_startp);
2267
2268                 /* Add the final weight.  */
2269                 weightidx = output_weight (&weightpool, collate, curp);
2270                 obstack_int32_grow (&indirectpool, weightidx);
2271
2272                 /* And add the end byte sequence.  Without length this
2273                    time.  */
2274                 for (i = 1; i < curp->nmbs; ++i)
2275                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2276               }
2277             else
2278               {
2279                 /* A single entry.  Simply add the index and the length and
2280                    string (except for the first character which is already
2281                    tested for).  */
2282                 int i;
2283
2284                 /* Output the weight info.  */
2285                 weightidx = output_weight (&weightpool, collate, runp);
2286
2287                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2288                                           + runp->nmbs - 1);
2289                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2290                 obstack_make_room (&extrapool, added);
2291
2292                 obstack_int32_grow_fast (&extrapool, weightidx);
2293                 assert (runp->nmbs <= 256);
2294                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2295
2296                 for (i = 1; i < runp->nmbs; ++i)
2297                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2298               }
2299
2300             /* Add alignment bytes if necessary.  */
2301             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2302               obstack_1grow_fast (&extrapool, '\0');
2303
2304             /* Next entry.  */
2305             lastp = runp;
2306             runp = runp->mbnext;
2307           }
2308         while (runp != NULL);
2309
2310         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2311
2312         /* If the final entry in the list is not a single character we
2313            add an UNDEFINED entry here.  */
2314         if (lastp->nmbs != 1)
2315           {
2316             int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2317             obstack_make_room (&extrapool, added);
2318
2319             obstack_int32_grow_fast (&extrapool, 0);
2320             /* XXX What rule? We just pick the first.  */
2321             obstack_1grow_fast (&extrapool, 0);
2322             /* Length is zero.  */
2323             obstack_1grow_fast (&extrapool, 0);
2324
2325             /* Add alignment bytes if necessary.  */
2326             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2327               obstack_1grow_fast (&extrapool, '\0');
2328           }
2329       }
2330
2331   /* Add padding to the tables if necessary.  */
2332   while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2333     obstack_1grow (&weightpool, 0);
2334
2335   /* Now add the four tables.  */
2336   add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2337   add_locale_raw_obstack (&file, &weightpool);
2338   add_locale_raw_obstack (&file, &extrapool);
2339   add_locale_raw_obstack (&file, &indirectpool);
2340
2341   /* Now the same for the wide character table.  We need to store some
2342      more information here.  */
2343   add_locale_empty (&file);
2344   add_locale_empty (&file);
2345   add_locale_empty (&file);
2346
2347   /* Since we are using the sign of an integer to mark indirection the
2348      offsets in the arrays we are indirectly referring to must not be
2349      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2350   obstack_int32_grow (&extrapool, 0);
2351   obstack_int32_grow (&indirectpool, 0);
2352
2353   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2354      will probably be used more than once it is good to store the
2355      weights only once.  */
2356   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2357     abort ();
2358
2359   /* Generate the table.  Walk through the lists of sequences starting
2360      with the same wide character and add them one after the other to
2361      the table.  In case we have more than one sequence starting with
2362      the same byte we have to use extra indirection.  */
2363   tablewc.p = 6;
2364   tablewc.q = 10;
2365   collidx_table_init (&tablewc);
2366
2367   atwc.weightpool = &weightpool;
2368   atwc.extrapool = &extrapool;
2369   atwc.indpool = &indirectpool;
2370   atwc.collate = collate;
2371   atwc.tablewc = &tablewc;
2372
2373   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2374
2375   memset (&atwc, 0, sizeof (atwc));
2376
2377   /* Now add the four tables.  */
2378   add_locale_collidx_table (&file, &tablewc);
2379   add_locale_raw_obstack (&file, &weightpool);
2380   add_locale_raw_obstack (&file, &extrapool);
2381   add_locale_raw_obstack (&file, &indirectpool);
2382
2383   /* Finally write the table with collation element names out.  It is
2384      a hash table with a simple function which gets the name of the
2385      character as the input.  One character might have many names.  The
2386      value associated with the name is an index into the weight table
2387      where we are then interested in the first-level weight value.
2388
2389      To determine how large the table should be we are counting the
2390      elements have to put in.  Since we are using internal chaining
2391      using a secondary hash function we have to make the table a bit
2392      larger to avoid extremely long search times.  We can achieve
2393      good results with a 40% larger table than there are entries.  */
2394   elem_size = 0;
2395   runp = collate->start;
2396   while (runp != NULL)
2397     {
2398       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2399         /* Yep, the element really counts.  */
2400         ++elem_size;
2401
2402       runp = runp->next;
2403     }
2404   /* Add 50% and find the next prime number.  */
2405   elem_size = next_prime (elem_size + (elem_size >> 1));
2406
2407   /* Allocate the table.  Each entry consists of two words: the hash
2408      value and an index in a secondary table which provides the index
2409      into the weight table and the string itself (so that a match can
2410      be determined).  */
2411   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2412                                            elem_size * 2 * sizeof (uint32_t));
2413   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2414
2415   /* Now add the elements.  */
2416   runp = collate->start;
2417   while (runp != NULL)
2418     {
2419       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2420         {
2421           /* Compute the hash value of the name.  */
2422           uint32_t namelen = strlen (runp->name);
2423           uint32_t hash = elem_hash (runp->name, namelen);
2424           size_t idx = hash % elem_size;
2425 #ifndef NDEBUG
2426           size_t start_idx = idx;
2427 #endif
2428
2429           if (elem_table[idx * 2] != 0)
2430             {
2431               /* The spot is already taken.  Try iterating using the value
2432                  from the secondary hashing function.  */
2433               size_t iter = hash % (elem_size - 2) + 1;
2434
2435               do
2436                 {
2437                   idx += iter;
2438                   if (idx >= elem_size)
2439                     idx -= elem_size;
2440                   assert (idx != start_idx);
2441                 }
2442               while (elem_table[idx * 2] != 0);
2443             }
2444           /* This is the spot where we will insert the value.  */
2445           elem_table[idx * 2] = hash;
2446           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2447
2448           /* The string itself including length.  */
2449           obstack_1grow (&extrapool, namelen);
2450           obstack_grow (&extrapool, runp->name, namelen);
2451
2452           /* And the multibyte representation.  */
2453           obstack_1grow (&extrapool, runp->nmbs);
2454           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2455
2456           /* And align again to 32 bits.  */
2457           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2458             obstack_grow (&extrapool, "\0\0",
2459                           (sizeof (int32_t)
2460                            - ((1 + namelen + 1 + runp->nmbs)
2461                               % sizeof (int32_t))));
2462
2463           /* Now some 32-bit values: multibyte collation sequence,
2464              wide char string (including length), and wide char
2465              collation sequence.  */
2466           obstack_int32_grow (&extrapool, runp->mbseqorder);
2467
2468           obstack_int32_grow (&extrapool, runp->nwcs);
2469           obstack_grow (&extrapool, runp->wcs,
2470                         runp->nwcs * sizeof (uint32_t));
2471           maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2472
2473           obstack_int32_grow (&extrapool, runp->wcseqorder);
2474         }
2475
2476       runp = runp->next;
2477     }
2478
2479   /* Prepare to write out this data.  */
2480   add_locale_uint32 (&file, elem_size);
2481   add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2482   add_locale_raw_obstack (&file, &extrapool);
2483   add_locale_raw_data (&file, collate->mbseqorder, 256);
2484   add_locale_collseq_table (&file, &collate->wcseqorder);
2485   add_locale_string (&file, charmap->code_set_name);
2486   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2487
2488   obstack_free (&weightpool, NULL);
2489   obstack_free (&extrapool, NULL);
2490   obstack_free (&indirectpool, NULL);
2491 }
2492
2493
2494 static enum token_t
2495 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2496          const struct charmap_t *charmap, int to_endif)
2497 {
2498   while (1)
2499     {
2500       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2501       enum token_t nowtok = now->tok;
2502
2503       if (nowtok == tok_eof || nowtok == tok_end)
2504         return nowtok;
2505
2506       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2507         {
2508           lr_error (ldfile, _("%s: nested conditionals not supported"),
2509                     "LC_COLLATE");
2510           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2511           if (nowtok == tok_eof || nowtok == tok_end)
2512             return nowtok;
2513         }
2514       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2515         {
2516           lr_ignore_rest (ldfile, 1);
2517           return nowtok;
2518         }
2519       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2520         {
2521           /* Do not read the rest of the line.  */
2522           return nowtok;
2523         }
2524       else if (nowtok == tok_else)
2525         {
2526           lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2527         }
2528
2529       lr_ignore_rest (ldfile, 0);
2530     }
2531 }
2532
2533
2534 void
2535 collate_read (struct linereader *ldfile, struct localedef_t *result,
2536               const struct charmap_t *charmap, const char *repertoire_name,
2537               int ignore_content)
2538 {
2539   struct repertoire_t *repertoire = NULL;
2540   struct locale_collate_t *collate;
2541   struct token *now;
2542   struct token *arg = NULL;
2543   enum token_t nowtok;
2544   enum token_t was_ellipsis = tok_none;
2545   struct localedef_t *copy_locale = NULL;
2546   /* Parsing state:
2547      0 - start
2548      1 - between `order-start' and `order-end'
2549      2 - after `order-end'
2550      3 - after `reorder-after', waiting for `reorder-end'
2551      4 - after `reorder-end'
2552      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2553      6 - after `reorder-sections-end'
2554   */
2555   int state = 0;
2556
2557   /* Get the repertoire we have to use.  */
2558   if (repertoire_name != NULL)
2559     repertoire = repertoire_read (repertoire_name);
2560
2561   /* The rest of the line containing `LC_COLLATE' must be free.  */
2562   lr_ignore_rest (ldfile, 1);
2563
2564   while (1)
2565     {
2566       do
2567         {
2568           now = lr_token (ldfile, charmap, result, NULL, verbose);
2569           nowtok = now->tok;
2570         }
2571       while (nowtok == tok_eol);
2572
2573       if (nowtok != tok_define)
2574         break;
2575
2576       if (ignore_content)
2577         lr_ignore_rest (ldfile, 0);
2578       else
2579         {
2580           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2581           if (arg->tok != tok_ident)
2582             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2583           else
2584             {
2585               /* Simply add the new symbol.  */
2586               struct name_list *newsym = xmalloc (sizeof (*newsym)
2587                                                   + arg->val.str.lenmb + 1);
2588               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2589               newsym->str[arg->val.str.lenmb] = '\0';
2590               newsym->next = defined;
2591               defined = newsym;
2592
2593               lr_ignore_rest (ldfile, 1);
2594             }
2595         }
2596     }
2597
2598   if (nowtok == tok_copy)
2599     {
2600       now = lr_token (ldfile, charmap, result, NULL, verbose);
2601       if (now->tok != tok_string)
2602         {
2603           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2604
2605         skip_category:
2606           do
2607             now = lr_token (ldfile, charmap, result, NULL, verbose);
2608           while (now->tok != tok_eof && now->tok != tok_end);
2609
2610           if (now->tok != tok_eof
2611               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2612                   now->tok == tok_eof))
2613             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2614           else if (now->tok != tok_lc_collate)
2615             {
2616               lr_error (ldfile, _("\
2617 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2618               lr_ignore_rest (ldfile, 0);
2619             }
2620           else
2621             lr_ignore_rest (ldfile, 1);
2622
2623           return;
2624         }
2625
2626       if (! ignore_content)
2627         {
2628           /* Get the locale definition.  */
2629           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2630                                      repertoire_name, charmap, NULL);
2631           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2632             {
2633               /* Not yet loaded.  So do it now.  */
2634               if (locfile_read (copy_locale, charmap) != 0)
2635                 goto skip_category;
2636             }
2637
2638           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2639             return;
2640         }
2641
2642       lr_ignore_rest (ldfile, 1);
2643
2644       now = lr_token (ldfile, charmap, result, NULL, verbose);
2645       nowtok = now->tok;
2646     }
2647
2648   /* Prepare the data structures.  */
2649   collate_startup (ldfile, result, copy_locale, ignore_content);
2650   collate = result->categories[LC_COLLATE].collate;
2651
2652   while (1)
2653     {
2654       char ucs4buf[10];
2655       char *symstr;
2656       size_t symlen;
2657
2658       /* Of course we don't proceed beyond the end of file.  */
2659       if (nowtok == tok_eof)
2660         break;
2661
2662       /* Ingore empty lines.  */
2663       if (nowtok == tok_eol)
2664         {
2665           now = lr_token (ldfile, charmap, result, NULL, verbose);
2666           nowtok = now->tok;
2667           continue;
2668         }
2669
2670       switch (nowtok)
2671         {
2672         case tok_copy:
2673           /* Allow copying other locales.  */
2674           now = lr_token (ldfile, charmap, result, NULL, verbose);
2675           if (now->tok != tok_string)
2676             goto err_label;
2677
2678           if (! ignore_content)
2679             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2680                          charmap, result);
2681
2682           lr_ignore_rest (ldfile, 1);
2683           break;
2684
2685         case tok_coll_weight_max:
2686           /* Ignore the rest of the line if we don't need the input of
2687              this line.  */
2688           if (ignore_content)
2689             {
2690               lr_ignore_rest (ldfile, 0);
2691               break;
2692             }
2693
2694           if (state != 0)
2695             goto err_label;
2696
2697           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2698           if (arg->tok != tok_number)
2699             goto err_label;
2700           if (collate->col_weight_max != -1)
2701             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2702                       "LC_COLLATE", "col_weight_max");
2703           else
2704             collate->col_weight_max = arg->val.num;
2705           lr_ignore_rest (ldfile, 1);
2706           break;
2707
2708         case tok_section_symbol:
2709           /* Ignore the rest of the line if we don't need the input of
2710              this line.  */
2711           if (ignore_content)
2712             {
2713               lr_ignore_rest (ldfile, 0);
2714               break;
2715             }
2716
2717           if (state != 0)
2718             goto err_label;
2719
2720           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2721           if (arg->tok != tok_bsymbol)
2722             goto err_label;
2723           else if (!ignore_content)
2724             {
2725               /* Check whether this section is already known.  */
2726               struct section_list *known = collate->sections;
2727               while (known != NULL)
2728                 {
2729                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2730                     break;
2731                   known = known->next;
2732                 }
2733
2734               if (known != NULL)
2735                 {
2736                   lr_error (ldfile,
2737                             _("%s: duplicate declaration of section `%s'"),
2738                             "LC_COLLATE", arg->val.str.startmb);
2739                   free (arg->val.str.startmb);
2740                 }
2741               else
2742                 collate->sections = make_seclist_elem (collate,
2743                                                        arg->val.str.startmb,
2744                                                        collate->sections);
2745
2746               lr_ignore_rest (ldfile, known == NULL);
2747             }
2748           else
2749             {
2750               free (arg->val.str.startmb);
2751               lr_ignore_rest (ldfile, 0);
2752             }
2753           break;
2754
2755         case tok_collating_element:
2756           /* Ignore the rest of the line if we don't need the input of
2757              this line.  */
2758           if (ignore_content)
2759             {
2760               lr_ignore_rest (ldfile, 0);
2761               break;
2762             }
2763
2764           if (state != 0 && state != 2)
2765             goto err_label;
2766
2767           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2768           if (arg->tok != tok_bsymbol)
2769             goto err_label;
2770           else
2771             {
2772               const char *symbol = arg->val.str.startmb;
2773               size_t symbol_len = arg->val.str.lenmb;
2774
2775               /* Next the `from' keyword.  */
2776               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2777               if (arg->tok != tok_from)
2778                 {
2779                   free ((char *) symbol);
2780                   goto err_label;
2781                 }
2782
2783               ldfile->return_widestr = 1;
2784               ldfile->translate_strings = 1;
2785
2786               /* Finally the string with the replacement.  */
2787               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2788
2789               ldfile->return_widestr = 0;
2790               ldfile->translate_strings = 0;
2791
2792               if (arg->tok != tok_string)
2793                 goto err_label;
2794
2795               if (!ignore_content && symbol != NULL)
2796                 {
2797                   /* The name is already defined.  */
2798                   if (check_duplicate (ldfile, collate, charmap,
2799                                        repertoire, symbol, symbol_len))
2800                     goto col_elem_free;
2801
2802                   if (arg->val.str.startmb != NULL)
2803                     insert_entry (&collate->elem_table, symbol, symbol_len,
2804                                   new_element (collate,
2805                                                arg->val.str.startmb,
2806                                                arg->val.str.lenmb - 1,
2807                                                arg->val.str.startwc,
2808                                                symbol, symbol_len, 0));
2809                 }
2810               else
2811                 {
2812                 col_elem_free:
2813                   free ((char *) symbol);
2814                   free (arg->val.str.startmb);
2815                   free (arg->val.str.startwc);
2816                 }
2817               lr_ignore_rest (ldfile, 1);
2818             }
2819           break;
2820
2821         case tok_collating_symbol:
2822           /* Ignore the rest of the line if we don't need the input of
2823              this line.  */
2824           if (ignore_content)
2825             {
2826               lr_ignore_rest (ldfile, 0);
2827               break;
2828             }
2829
2830           if (state != 0 && state != 2)
2831             goto err_label;
2832
2833           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2834           if (arg->tok != tok_bsymbol)
2835             goto err_label;
2836           else
2837             {
2838               char *symbol = arg->val.str.startmb;
2839               size_t symbol_len = arg->val.str.lenmb;
2840               char *endsymbol = NULL;
2841               size_t endsymbol_len = 0;
2842               enum token_t ellipsis = tok_none;
2843
2844               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2845               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2846                 {
2847                   ellipsis = arg->tok;
2848
2849                   arg = lr_token (ldfile, charmap, result, repertoire,
2850                                   verbose);
2851                   if (arg->tok != tok_bsymbol)
2852                     {
2853                       free (symbol);
2854                       goto err_label;
2855                     }
2856
2857                   endsymbol = arg->val.str.startmb;
2858                   endsymbol_len = arg->val.str.lenmb;
2859
2860                   lr_ignore_rest (ldfile, 1);
2861                 }
2862               else if (arg->tok != tok_eol)
2863                 {
2864                   free (symbol);
2865                   goto err_label;
2866                 }
2867
2868               if (!ignore_content)
2869                 {
2870                   if (symbol == NULL
2871                       || (ellipsis != tok_none && endsymbol == NULL))
2872                     {
2873                       lr_error (ldfile, _("\
2874 %s: unknown character in collating symbol name"),
2875                                 "LC_COLLATE");
2876                       goto col_sym_free;
2877                     }
2878                   else if (ellipsis == tok_none)
2879                     {
2880                       /* A single symbol, no ellipsis.  */
2881                       if (check_duplicate (ldfile, collate, charmap,
2882                                            repertoire, symbol, symbol_len))
2883                         /* The name is already defined.  */
2884                         goto col_sym_free;
2885
2886                       insert_entry (&collate->sym_table, symbol, symbol_len,
2887                                     new_symbol (collate, symbol, symbol_len));
2888                     }
2889                   else if (symbol_len != endsymbol_len)
2890                     {
2891                     col_sym_inv_range:
2892                       lr_error (ldfile,
2893                                 _("invalid names for character range"));
2894                       goto col_sym_free;
2895                     }
2896                   else
2897                     {
2898                       /* Oh my, we have to handle an ellipsis.  First, as
2899                          usual, determine the common prefix and then
2900                          convert the rest into a range.  */
2901                       size_t prefixlen;
2902                       unsigned long int from;
2903                       unsigned long int to;
2904                       char *endp;
2905
2906                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2907                         if (symbol[prefixlen] != endsymbol[prefixlen])
2908                           break;
2909
2910                       /* Convert the rest into numbers.  */
2911                       symbol[symbol_len] = '\0';
2912                       from = strtoul (&symbol[prefixlen], &endp,
2913                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2914                       if (*endp != '\0')
2915                         goto col_sym_inv_range;
2916
2917                       endsymbol[symbol_len] = '\0';
2918                       to = strtoul (&endsymbol[prefixlen], &endp,
2919                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2920                       if (*endp != '\0')
2921                         goto col_sym_inv_range;
2922
2923                       if (from > to)
2924                         goto col_sym_inv_range;
2925
2926                       /* Now loop over all entries.  */
2927                       while (from <= to)
2928                         {
2929                           char *symbuf;
2930
2931                           symbuf = (char *) obstack_alloc (&collate->mempool,
2932                                                            symbol_len + 1);
2933
2934                           /* Create the name.  */
2935                           sprintf (symbuf,
2936                                    ellipsis == tok_ellipsis2
2937                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2938                                    (int) prefixlen, symbol,
2939                                    (int) (symbol_len - prefixlen), from);
2940
2941                           if (check_duplicate (ldfile, collate, charmap,
2942                                                repertoire, symbuf, symbol_len))
2943                             /* The name is already defined.  */
2944                             goto col_sym_free;
2945
2946                           insert_entry (&collate->sym_table, symbuf,
2947                                         symbol_len,
2948                                         new_symbol (collate, symbuf,
2949                                                     symbol_len));
2950
2951                           /* Increment the counter.  */
2952                           ++from;
2953                         }
2954
2955                       goto col_sym_free;
2956                     }
2957                 }
2958               else
2959                 {
2960                 col_sym_free:
2961                   free (symbol);
2962                   free (endsymbol);
2963                 }
2964             }
2965           break;
2966
2967         case tok_symbol_equivalence:
2968           /* Ignore the rest of the line if we don't need the input of
2969              this line.  */
2970           if (ignore_content)
2971             {
2972               lr_ignore_rest (ldfile, 0);
2973               break;
2974             }
2975
2976           if (state != 0)
2977             goto err_label;
2978
2979           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2980           if (arg->tok != tok_bsymbol)
2981             goto err_label;
2982           else
2983             {
2984               const char *newname = arg->val.str.startmb;
2985               size_t newname_len = arg->val.str.lenmb;
2986               const char *symname;
2987               size_t symname_len;
2988               void *symval;     /* Actually struct symbol_t*  */
2989
2990               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2991               if (arg->tok != tok_bsymbol)
2992                 {
2993                   free ((char *) newname);
2994                   goto err_label;
2995                 }
2996
2997               symname = arg->val.str.startmb;
2998               symname_len = arg->val.str.lenmb;
2999
3000               if (newname == NULL)
3001                 {
3002                   lr_error (ldfile, _("\
3003 %s: unknown character in equivalent definition name"),
3004                             "LC_COLLATE");
3005
3006                 sym_equiv_free:
3007                   free ((char *) newname);
3008                   free ((char *) symname);
3009                   break;
3010                 }
3011               if (symname == NULL)
3012                 {
3013                   lr_error (ldfile, _("\
3014 %s: unknown character in equivalent definition value"),
3015                             "LC_COLLATE");
3016                   goto sym_equiv_free;
3017                 }
3018
3019               /* See whether the symbol name is already defined.  */
3020               if (find_entry (&collate->sym_table, symname, symname_len,
3021                               &symval) != 0)
3022                 {
3023                   lr_error (ldfile, _("\
3024 %s: unknown symbol `%s' in equivalent definition"),
3025                             "LC_COLLATE", symname);
3026                   goto sym_equiv_free;
3027                 }
3028
3029               if (insert_entry (&collate->sym_table,
3030                                 newname, newname_len, symval) < 0)
3031                 {
3032                   lr_error (ldfile, _("\
3033 error while adding equivalent collating symbol"));
3034                   goto sym_equiv_free;
3035                 }
3036
3037               free ((char *) symname);
3038             }
3039           lr_ignore_rest (ldfile, 1);
3040           break;
3041
3042         case tok_script:
3043           /* Ignore the rest of the line if we don't need the input of
3044              this line.  */
3045           if (ignore_content)
3046             {
3047               lr_ignore_rest (ldfile, 0);
3048               break;
3049             }
3050
3051           /* We get told about the scripts we know.  */
3052           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3053           if (arg->tok != tok_bsymbol)
3054             goto err_label;
3055           else
3056             {
3057               struct section_list *runp = collate->known_sections;
3058               char *name;
3059
3060               while (runp != NULL)
3061                 if (strncmp (runp->name, arg->val.str.startmb,
3062                              arg->val.str.lenmb) == 0
3063                     && runp->name[arg->val.str.lenmb] == '\0')
3064                   break;
3065                 else
3066                   runp = runp->def_next;
3067
3068               if (runp != NULL)
3069                 {
3070                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3071                             runp->name);
3072                   lr_ignore_rest (ldfile, 0);
3073                   break;
3074                 }
3075
3076               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3077               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3078               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3079               name[arg->val.str.lenmb] = '\0';
3080               runp->name = name;
3081
3082               runp->def_next = collate->known_sections;
3083               collate->known_sections = runp;
3084             }
3085           lr_ignore_rest (ldfile, 1);
3086           break;
3087
3088         case tok_order_start:
3089           /* Ignore the rest of the line if we don't need the input of
3090              this line.  */
3091           if (ignore_content)
3092             {
3093               lr_ignore_rest (ldfile, 0);
3094               break;
3095             }
3096
3097           if (state != 0 && state != 1 && state != 2)
3098             goto err_label;
3099           state = 1;
3100
3101           /* The 14652 draft does not specify whether all `order_start' lines
3102              must contain the same number of sort-rules, but 14651 does.  So
3103              we require this here as well.  */
3104           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3105           if (arg->tok == tok_bsymbol)
3106             {
3107               /* This better should be a section name.  */
3108               struct section_list *sp = collate->known_sections;
3109               while (sp != NULL
3110                      && (sp->name == NULL
3111                          || strncmp (sp->name, arg->val.str.startmb,
3112                                      arg->val.str.lenmb) != 0
3113                          || sp->name[arg->val.str.lenmb] != '\0'))
3114                 sp = sp->def_next;
3115
3116               if (sp == NULL)
3117                 {
3118                   lr_error (ldfile, _("\
3119 %s: unknown section name `%.*s'"),
3120                             "LC_COLLATE", (int) arg->val.str.lenmb,
3121                             arg->val.str.startmb);
3122                   /* We use the error section.  */
3123                   collate->current_section = &collate->error_section;
3124
3125                   if (collate->error_section.first == NULL)
3126                     {
3127                       /* Insert &collate->error_section at the end of
3128                          the collate->sections list.  */
3129                       if (collate->sections == NULL)
3130                         collate->sections = &collate->error_section;
3131                       else
3132                         {
3133                           sp = collate->sections;
3134                           while (sp->next != NULL)
3135                             sp = sp->next;
3136
3137                           sp->next = &collate->error_section;
3138                         }
3139                       collate->error_section.next = NULL;
3140                     }
3141                 }
3142               else
3143                 {
3144                   /* One should not be allowed to open the same
3145                      section twice.  */
3146                   if (sp->first != NULL)
3147                     lr_error (ldfile, _("\
3148 %s: multiple order definitions for section `%s'"),
3149                               "LC_COLLATE", sp->name);
3150                   else
3151                     {
3152                       /* Insert sp in the collate->sections list,
3153                          right after collate->current_section.  */
3154                       if (collate->current_section != NULL)
3155                         {
3156                           sp->next = collate->current_section->next;
3157                           collate->current_section->next = sp;
3158                         }
3159                       else if (collate->sections == NULL)
3160                         /* This is the first section to be defined.  */
3161                         collate->sections = sp;
3162
3163                       collate->current_section = sp;
3164                     }
3165
3166                   /* Next should come the end of the line or a semicolon.  */
3167                   arg = lr_token (ldfile, charmap, result, repertoire,
3168                                   verbose);
3169                   if (arg->tok == tok_eol)
3170                     {
3171                       uint32_t cnt;
3172
3173                       /* This means we have exactly one rule: `forward'.  */
3174                       if (nrules > 1)
3175                         lr_error (ldfile, _("\
3176 %s: invalid number of sorting rules"),
3177                                   "LC_COLLATE");
3178                       else
3179                         nrules = 1;
3180                       sp->rules = obstack_alloc (&collate->mempool,
3181                                                  (sizeof (enum coll_sort_rule)
3182                                                   * nrules));
3183                       for (cnt = 0; cnt < nrules; ++cnt)
3184                         sp->rules[cnt] = sort_forward;
3185
3186                       /* Next line.  */
3187                       break;
3188                     }
3189
3190                   /* Get the next token.  */
3191                   arg = lr_token (ldfile, charmap, result, repertoire,
3192                                   verbose);
3193                 }
3194             }
3195           else
3196             {
3197               /* There is no section symbol.  Therefore we use the unnamed
3198                  section.  */
3199               collate->current_section = &collate->unnamed_section;
3200
3201               if (collate->unnamed_section_defined)
3202                 lr_error (ldfile, _("\
3203 %s: multiple order definitions for unnamed section"),
3204                           "LC_COLLATE");
3205               else
3206                 {
3207                   /* Insert &collate->unnamed_section at the beginning of
3208                      the collate->sections list.  */
3209                   collate->unnamed_section.next = collate->sections;
3210                   collate->sections = &collate->unnamed_section;
3211                   collate->unnamed_section_defined = true;
3212                 }
3213             }
3214
3215           /* Now read the direction names.  */
3216           read_directions (ldfile, arg, charmap, repertoire, result);
3217
3218           /* From now we need the strings untranslated.  */
3219           ldfile->translate_strings = 0;
3220           break;
3221
3222         case tok_order_end:
3223           /* Ignore the rest of the line if we don't need the input of
3224              this line.  */
3225           if (ignore_content)
3226             {
3227               lr_ignore_rest (ldfile, 0);
3228               break;
3229             }
3230
3231           if (state != 1)
3232             goto err_label;
3233
3234           /* Handle ellipsis at end of list.  */
3235           if (was_ellipsis != tok_none)
3236             {
3237               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3238                                repertoire, result);
3239               was_ellipsis = tok_none;
3240             }
3241
3242           state = 2;
3243           lr_ignore_rest (ldfile, 1);
3244           break;
3245
3246         case tok_reorder_after:
3247           /* Ignore the rest of the line if we don't need the input of
3248              this line.  */
3249           if (ignore_content)
3250             {
3251               lr_ignore_rest (ldfile, 0);
3252               break;
3253             }
3254
3255           if (state == 1)
3256             {
3257               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3258                         "LC_COLLATE");
3259               state = 2;
3260
3261               /* Handle ellipsis at end of list.  */
3262               if (was_ellipsis != tok_none)
3263                 {
3264                   handle_ellipsis (ldfile, arg->val.str.startmb,
3265                                    arg->val.str.lenmb, was_ellipsis, charmap,
3266                                    repertoire, result);
3267                   was_ellipsis = tok_none;
3268                 }
3269             }
3270           else if (state == 0 && copy_locale == NULL)
3271             goto err_label;
3272           else if (state != 0 && state != 2 && state != 3)
3273             goto err_label;
3274           state = 3;
3275
3276           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3277           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3278             {
3279               /* Find this symbol in the sequence table.  */
3280               char ucsbuf[10];
3281               char *startmb;
3282               size_t lenmb;
3283               struct element_t *insp;
3284               int no_error = 1;
3285               void *ptr;
3286
3287               if (arg->tok == tok_bsymbol)
3288                 {
3289                   startmb = arg->val.str.startmb;
3290                   lenmb = arg->val.str.lenmb;
3291                 }
3292               else
3293                 {
3294                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3295                   startmb = ucsbuf;
3296                   lenmb = 9;
3297                 }
3298
3299               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3300                 /* Yes, the symbol exists.  Simply point the cursor
3301                    to it.  */
3302                 collate->cursor = (struct element_t *) ptr;
3303               else
3304                 {
3305                   struct symbol_t *symbp;
3306                   void *ptr;
3307
3308                   if (find_entry (&collate->sym_table, startmb, lenmb,
3309                                   &ptr) == 0)
3310                     {
3311                       symbp = ptr;
3312
3313                       if (symbp->order->last != NULL
3314                           || symbp->order->next != NULL)
3315                         collate->cursor = symbp->order;
3316                       else
3317                         {
3318                           /* This is a collating symbol but its position
3319                              is not yet defined.  */
3320                           lr_error (ldfile, _("\
3321 %s: order for collating symbol %.*s not yet defined"),
3322                                     "LC_COLLATE", (int) lenmb, startmb);
3323                           collate->cursor = NULL;
3324                           no_error = 0;
3325                         }
3326                     }
3327                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3328                                        &ptr) == 0)
3329                     {
3330                       insp = (struct element_t *) ptr;
3331
3332                       if (insp->last != NULL || insp->next != NULL)
3333                         collate->cursor = insp;
3334                       else
3335                         {
3336                           /* This is a collating element but its position
3337                              is not yet defined.  */
3338                           lr_error (ldfile, _("\
3339 %s: order for collating element %.*s not yet defined"),
3340                                     "LC_COLLATE", (int) lenmb, startmb);
3341                           collate->cursor = NULL;
3342                           no_error = 0;
3343                         }
3344                     }
3345                   else
3346                     {
3347                       /* This is bad.  The symbol after which we have to
3348                          insert does not exist.  */
3349                       lr_error (ldfile, _("\
3350 %s: cannot reorder after %.*s: symbol not known"),
3351                                 "LC_COLLATE", (int) lenmb, startmb);
3352                       collate->cursor = NULL;
3353                       no_error = 0;
3354                     }
3355                 }
3356
3357               lr_ignore_rest (ldfile, no_error);
3358             }
3359           else
3360             /* This must not happen.  */
3361             goto err_label;
3362           break;
3363
3364         case tok_reorder_end:
3365           /* Ignore the rest of the line if we don't need the input of
3366              this line.  */
3367           if (ignore_content)
3368             break;
3369
3370           if (state != 3)
3371             goto err_label;
3372           state = 4;
3373           lr_ignore_rest (ldfile, 1);
3374           break;
3375
3376         case tok_reorder_sections_after:
3377           /* Ignore the rest of the line if we don't need the input of
3378              this line.  */
3379           if (ignore_content)
3380             {
3381               lr_ignore_rest (ldfile, 0);
3382               break;
3383             }
3384
3385           if (state == 1)
3386             {
3387               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3388                         "LC_COLLATE");
3389               state = 2;
3390
3391               /* Handle ellipsis at end of list.  */
3392               if (was_ellipsis != tok_none)
3393                 {
3394                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3395                                    repertoire, result);
3396                   was_ellipsis = tok_none;
3397                 }
3398             }
3399           else if (state == 3)
3400             {
3401               record_error (0, 0, _("\
3402 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3403               state = 4;
3404             }
3405           else if (state != 2 && state != 4)
3406             goto err_label;
3407           state = 5;
3408
3409           /* Get the name of the sections we are adding after.  */
3410           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3411           if (arg->tok == tok_bsymbol)
3412             {
3413               /* Now find a section with this name.  */
3414               struct section_list *runp = collate->sections;
3415
3416               while (runp != NULL)
3417                 {
3418                   if (runp->name != NULL
3419                       && strlen (runp->name) == arg->val.str.lenmb
3420                       && memcmp (runp->name, arg->val.str.startmb,
3421                                  arg->val.str.lenmb) == 0)
3422                     break;
3423
3424                   runp = runp->next;
3425                 }
3426
3427               if (runp != NULL)
3428                 collate->current_section = runp;
3429               else
3430                 {
3431                   /* This is bad.  The section after which we have to
3432                      reorder does not exist.  Therefore we cannot
3433                      process the whole rest of this reorder
3434                      specification.  */
3435                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3436                             "LC_COLLATE", (int) arg->val.str.lenmb,
3437                             arg->val.str.startmb);
3438
3439                   do
3440                     {
3441                       lr_ignore_rest (ldfile, 0);
3442
3443                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3444                     }
3445                   while (now->tok == tok_reorder_sections_after
3446                          || now->tok == tok_reorder_sections_end
3447                          || now->tok == tok_end);
3448
3449                   /* Process the token we just saw.  */
3450                   nowtok = now->tok;
3451                   continue;
3452                 }
3453             }
3454           else
3455             /* This must not happen.  */
3456             goto err_label;
3457           break;
3458
3459         case tok_reorder_sections_end:
3460           /* Ignore the rest of the line if we don't need the input of
3461              this line.  */
3462           if (ignore_content)
3463             break;
3464
3465           if (state != 5)
3466             goto err_label;
3467           state = 6;
3468           lr_ignore_rest (ldfile, 1);
3469           break;
3470
3471         case tok_bsymbol:
3472         case tok_ucs4:
3473           /* Ignore the rest of the line if we don't need the input of
3474              this line.  */
3475           if (ignore_content)
3476             {
3477               lr_ignore_rest (ldfile, 0);
3478               break;
3479             }
3480
3481           if (state != 0 && state != 1 && state != 3 && state != 5)
3482             goto err_label;
3483
3484           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3485             goto err_label;
3486
3487           if (nowtok == tok_ucs4)
3488             {
3489               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3490               symstr = ucs4buf;
3491               symlen = 9;
3492             }
3493           else if (arg != NULL)
3494             {
3495               symstr = arg->val.str.startmb;
3496               symlen = arg->val.str.lenmb;
3497             }
3498           else
3499             {
3500               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3501                         (int) ldfile->token.val.str.lenmb,
3502                         ldfile->token.val.str.startmb);
3503               break;
3504             }
3505
3506           struct element_t *seqp;
3507           if (state == 0)
3508             {
3509               /* We are outside an `order_start' region.  This means
3510                  we must only accept definitions of values for
3511                  collation symbols since these are purely abstract
3512                  values and don't need directions associated.  */
3513               void *ptr;
3514
3515               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3516                 {
3517                   seqp = ptr;
3518
3519                   /* It's already defined.  First check whether this
3520                      is really a collating symbol.  */
3521                   if (seqp->is_character)
3522                     goto err_label;
3523
3524                   goto move_entry;
3525                 }
3526               else
3527                 {
3528                   void *result;
3529
3530                   if (find_entry (&collate->sym_table, symstr, symlen,
3531                                   &result) != 0)
3532                     /* No collating symbol, it's an error.  */
3533                     goto err_label;
3534
3535                   /* Maybe this is the first time we define a symbol
3536                      value and it is before the first actual section.  */
3537                   if (collate->sections == NULL)
3538                     collate->sections = collate->current_section =
3539                       &collate->symbol_section;
3540                 }
3541
3542               if (was_ellipsis != tok_none)
3543                 {
3544                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3545                                    charmap, repertoire, result);
3546
3547                   /* Remember that we processed the ellipsis.  */
3548                   was_ellipsis = tok_none;
3549
3550                   /* And don't add the value a second time.  */
3551                   break;
3552                 }
3553             }
3554           else if (state == 3)
3555             {
3556               /* It is possible that we already have this collation sequence.
3557                  In this case we move the entry.  */
3558               void *sym;
3559               void *ptr;
3560
3561               /* If the symbol after which we have to insert was not found
3562                  ignore all entries.  */
3563               if (collate->cursor == NULL)
3564                 {
3565                   lr_ignore_rest (ldfile, 0);
3566                   break;
3567                 }
3568
3569               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3570                 {
3571                   seqp = (struct element_t *) ptr;
3572                   goto move_entry;
3573                 }
3574
3575               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3576                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3577                 goto move_entry;
3578
3579               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3580                   && (seqp = (struct element_t *) ptr,
3581                       seqp->last != NULL || seqp->next != NULL
3582                       || (collate->start != NULL && seqp == collate->start)))
3583                 {
3584                 move_entry:
3585                   /* Remove the entry from the old position.  */
3586                   if (seqp->last == NULL)
3587                     collate->start = seqp->next;
3588                   else
3589                     seqp->last->next = seqp->next;
3590                   if (seqp->next != NULL)
3591                     seqp->next->last = seqp->last;
3592
3593                   /* We also have to check whether this entry is the
3594                      first or last of a section.  */
3595                   if (seqp->section->first == seqp)
3596                     {
3597                       if (seqp->section->first == seqp->section->last)
3598                         /* This section has no content anymore.  */
3599                         seqp->section->first = seqp->section->last = NULL;
3600                       else
3601                         seqp->section->first = seqp->next;
3602                     }
3603                   else if (seqp->section->last == seqp)
3604                     seqp->section->last = seqp->last;
3605
3606                   /* Now insert it in the new place.  */
3607                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3608                                   tok_none);
3609                   break;
3610                 }
3611
3612               /* Otherwise we just add a new entry.  */
3613             }
3614           else if (state == 5)
3615             {
3616               /* We are reordering sections.  Find the named section.  */
3617               struct section_list *runp = collate->sections;
3618               struct section_list *prevp = NULL;
3619
3620               while (runp != NULL)
3621                 {
3622                   if (runp->name != NULL
3623                       && strlen (runp->name) == symlen
3624                       && memcmp (runp->name, symstr, symlen) == 0)
3625                     break;
3626
3627                   prevp = runp;
3628                   runp = runp->next;
3629                 }
3630
3631               if (runp == NULL)
3632                 {
3633                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3634                             "LC_COLLATE", (int) symlen, symstr);
3635                   lr_ignore_rest (ldfile, 0);
3636                 }
3637               else
3638                 {
3639                   if (runp != collate->current_section)
3640                     {
3641                       /* Remove the named section from the old place and
3642                          insert it in the new one.  */
3643                       prevp->next = runp->next;
3644
3645                       runp->next = collate->current_section->next;
3646                       collate->current_section->next = runp;
3647                       collate->current_section = runp;
3648                     }
3649
3650                   /* Process the rest of the line which might change
3651                      the collation rules.  */
3652                   arg = lr_token (ldfile, charmap, result, repertoire,
3653                                   verbose);
3654                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3655                     read_directions (ldfile, arg, charmap, repertoire,
3656                                      result);
3657                 }
3658               break;
3659             }
3660           else if (was_ellipsis != tok_none)
3661             {
3662               /* Using the information in the `ellipsis_weight'
3663                  element and this and the last value we have to handle
3664                  the ellipsis now.  */
3665               assert (state == 1);
3666
3667               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3668                                repertoire, result);
3669
3670               /* Remember that we processed the ellipsis.  */
3671               was_ellipsis = tok_none;
3672
3673               /* And don't add the value a second time.  */
3674               break;
3675             }
3676
3677           /* Now insert in the new place.  */
3678           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3679           break;
3680
3681         case tok_undefined:
3682           /* Ignore the rest of the line if we don't need the input of
3683              this line.  */
3684           if (ignore_content)
3685             {
3686               lr_ignore_rest (ldfile, 0);
3687               break;
3688             }
3689
3690           if (state != 1)
3691             goto err_label;
3692
3693           if (was_ellipsis != tok_none)
3694             {
3695               lr_error (ldfile,
3696                         _("%s: cannot have `%s' as end of ellipsis range"),
3697                         "LC_COLLATE", "UNDEFINED");
3698
3699               unlink_element (collate);
3700               was_ellipsis = tok_none;
3701             }
3702
3703           /* See whether UNDEFINED already appeared somewhere.  */
3704           if (collate->undefined.next != NULL
3705               || &collate->undefined == collate->cursor)
3706             {
3707               lr_error (ldfile,
3708                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3709                         "LC_COLLATE", 9, "UNDEFINED",
3710                         collate->undefined.file,
3711                         collate->undefined.line);
3712               lr_ignore_rest (ldfile, 0);
3713             }
3714           else
3715             /* Parse the weights.  */
3716              insert_weights (ldfile, &collate->undefined, charmap,
3717                              repertoire, result, tok_none);
3718           break;
3719
3720         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3721         case tok_ellipsis3: /* absolute ellipsis */
3722         case tok_ellipsis4: /* symbolic decimal ellipsis */
3723           /* This is the symbolic (decimal or hexadecimal) or absolute
3724              ellipsis.  */
3725           if (was_ellipsis != tok_none)
3726             goto err_label;
3727
3728           if (state != 0 && state != 1 && state != 3)
3729             goto err_label;
3730
3731           was_ellipsis = nowtok;
3732
3733           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3734                           repertoire, result, nowtok);
3735           break;
3736
3737         case tok_end:
3738         seen_end:
3739           /* Next we assume `LC_COLLATE'.  */
3740           if (!ignore_content)
3741             {
3742               if (state == 0 && copy_locale == NULL)
3743                 /* We must either see a copy statement or have
3744                    ordering values.  */
3745                 lr_error (ldfile,
3746                           _("%s: empty category description not allowed"),
3747                           "LC_COLLATE");
3748               else if (state == 1)
3749                 {
3750                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3751                             "LC_COLLATE");
3752
3753                   /* Handle ellipsis at end of list.  */
3754                   if (was_ellipsis != tok_none)
3755                     {
3756                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3757                                        repertoire, result);
3758                       was_ellipsis = tok_none;
3759                     }
3760                 }
3761               else if (state == 3)
3762                 record_error (0, 0, _("\
3763 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3764               else if (state == 5)
3765                 record_error (0, 0, _("\
3766 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3767             }
3768           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3769           if (arg->tok == tok_eof)
3770             break;
3771           if (arg->tok == tok_eol)
3772             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3773           else if (arg->tok != tok_lc_collate)
3774             lr_error (ldfile, _("\
3775 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3776           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3777           return;
3778
3779         case tok_define:
3780           if (ignore_content)
3781             {
3782               lr_ignore_rest (ldfile, 0);
3783               break;
3784             }
3785
3786           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3787           if (arg->tok != tok_ident)
3788             goto err_label;
3789
3790           /* Simply add the new symbol.  */
3791           struct name_list *newsym = xmalloc (sizeof (*newsym)
3792                                               + arg->val.str.lenmb + 1);
3793           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3794           newsym->str[arg->val.str.lenmb] = '\0';
3795           newsym->next = defined;
3796           defined = newsym;
3797
3798           lr_ignore_rest (ldfile, 1);
3799           break;
3800
3801         case tok_undef:
3802           if (ignore_content)
3803             {
3804               lr_ignore_rest (ldfile, 0);
3805               break;
3806             }
3807
3808           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3809           if (arg->tok != tok_ident)
3810             goto err_label;
3811
3812           /* Remove _all_ occurrences of the symbol from the list.  */
3813           struct name_list *prevdef = NULL;
3814           struct name_list *curdef = defined;
3815           while (curdef != NULL)
3816             if (strncmp (arg->val.str.startmb, curdef->str,
3817                          arg->val.str.lenmb) == 0
3818                 && curdef->str[arg->val.str.lenmb] == '\0')
3819               {
3820                 if (prevdef == NULL)
3821                   defined = curdef->next;
3822                 else
3823                   prevdef->next = curdef->next;
3824
3825                 struct name_list *olddef = curdef;
3826                 curdef = curdef->next;
3827
3828                 free (olddef);
3829               }
3830             else
3831               {
3832                 prevdef = curdef;
3833                 curdef = curdef->next;
3834               }
3835
3836           lr_ignore_rest (ldfile, 1);
3837           break;
3838
3839         case tok_ifdef:
3840         case tok_ifndef:
3841           if (ignore_content)
3842             {
3843               lr_ignore_rest (ldfile, 0);
3844               break;
3845             }
3846
3847         found_ifdef:
3848           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3849           if (arg->tok != tok_ident)
3850             goto err_label;
3851           lr_ignore_rest (ldfile, 1);
3852
3853           if (collate->else_action == else_none)
3854             {
3855               curdef = defined;
3856               while (curdef != NULL)
3857                 if (strncmp (arg->val.str.startmb, curdef->str,
3858                              arg->val.str.lenmb) == 0
3859                     && curdef->str[arg->val.str.lenmb] == '\0')
3860                   break;
3861                 else
3862                   curdef = curdef->next;
3863
3864               if ((nowtok == tok_ifdef && curdef != NULL)
3865                   || (nowtok == tok_ifndef && curdef == NULL))
3866                 {
3867                   /* We have to use the if-branch.  */
3868                   collate->else_action = else_ignore;
3869                 }
3870               else
3871                 {
3872                   /* We have to use the else-branch, if there is one.  */
3873                   nowtok = skip_to (ldfile, collate, charmap, 0);
3874                   if (nowtok == tok_else)
3875                     collate->else_action = else_seen;
3876                   else if (nowtok == tok_elifdef)
3877                     {
3878                       nowtok = tok_ifdef;
3879                       goto found_ifdef;
3880                     }
3881                   else if (nowtok == tok_elifndef)
3882                     {
3883                       nowtok = tok_ifndef;
3884                       goto found_ifdef;
3885                     }
3886                   else if (nowtok == tok_eof)
3887                     goto seen_eof;
3888                   else if (nowtok == tok_end)
3889                     goto seen_end;
3890                 }
3891             }
3892           else
3893             {
3894               /* XXX Should it really become necessary to support nested
3895                  preprocessor handling we will push the state here.  */
3896               lr_error (ldfile, _("%s: nested conditionals not supported"),
3897                         "LC_COLLATE");
3898               nowtok = skip_to (ldfile, collate, charmap, 1);
3899               if (nowtok == tok_eof)
3900                 goto seen_eof;
3901               else if (nowtok == tok_end)
3902                 goto seen_end;
3903             }
3904           break;
3905
3906         case tok_elifdef:
3907         case tok_elifndef:
3908         case tok_else:
3909           if (ignore_content)
3910             {
3911               lr_ignore_rest (ldfile, 0);
3912               break;
3913             }
3914
3915           lr_ignore_rest (ldfile, 1);
3916
3917           if (collate->else_action == else_ignore)
3918             {
3919               /* Ignore everything until the endif.  */
3920               nowtok = skip_to (ldfile, collate, charmap, 1);
3921               if (nowtok == tok_eof)
3922                 goto seen_eof;
3923               else if (nowtok == tok_end)
3924                 goto seen_end;
3925             }
3926           else
3927             {
3928               assert (collate->else_action == else_none);
3929               lr_error (ldfile, _("\
3930 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3931                         nowtok == tok_else ? "else"
3932                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3933             }
3934           break;
3935
3936         case tok_endif:
3937           if (ignore_content)
3938             {
3939               lr_ignore_rest (ldfile, 0);
3940               break;
3941             }
3942
3943           lr_ignore_rest (ldfile, 1);
3944
3945           if (collate->else_action != else_ignore
3946               && collate->else_action != else_seen)
3947             lr_error (ldfile, _("\
3948 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3949
3950           /* XXX If we support nested preprocessor directives we pop
3951              the state here.  */
3952           collate->else_action = else_none;
3953           break;
3954
3955         default:
3956         err_label:
3957           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3958         }
3959
3960       /* Prepare for the next round.  */
3961       now = lr_token (ldfile, charmap, result, NULL, verbose);
3962       nowtok = now->tok;
3963     }
3964
3965  seen_eof:
3966   /* When we come here we reached the end of the file.  */
3967   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3968 }