locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2013 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <errno.h>
  23 #include <error.h>
  24 #include <stdlib.h>
  25 #include <wchar.h>
  26 #include <stdint.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35
  36 /* Uncomment the following line in the production version.  */
  37 /* #define NDEBUG 1 */
  38 #include <assert.h>
  39
  40 #define obstack_chunk_alloc malloc
  41 #define obstack_chunk_free free
  42
  43 static inline void
  44 __attribute ((always_inline))
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  48   data = maybe_swap_uint32 (data);
  49   if (sizeof (int32_t) == sizeof (int))
  50     obstack_int_grow (obstack, data);
  51   else
  52     obstack_grow (obstack, &data, sizeof (int32_t));
  53 }
  54
  55 static inline void
  56 __attribute ((always_inline))
  57 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  58 {
  59   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  60   data = maybe_swap_uint32 (data);
  61   if (sizeof (int32_t) == sizeof (int))
  62     obstack_int_grow_fast (obstack, data);
  63   else
  64     obstack_grow (obstack, &data, sizeof (int32_t));
  65 }
  66
  67 /* Forward declaration.  */
  68 struct element_t;
  69
  70 /* Data type for list of strings.  */
  71 struct section_list
  72 {
  73   /* Successor in the known_sections list.  */
  74   struct section_list *def_next;
  75   /* Successor in the sections list.  */
  76   struct section_list *next;
  77   /* Name of the section.  */
  78   const char *name;
  79   /* First element of this section.  */
  80   struct element_t *first;
  81   /* Last element of this section.  */
  82   struct element_t *last;
  83   /* These are the rules for this section.  */
  84   enum coll_sort_rule *rules;
  85   /* Index of the rule set in the appropriate section of the output file.  */
  86   int ruleidx;
  87 };
  88
  89 struct element_t;
  90
  91 struct element_list_t
  92 {
  93   /* Number of elements.  */
  94   int cnt;
  95
  96   struct element_t **w;
  97 };
  98
  99 /* Data type for collating element.  */
 100 struct element_t
 101 {
 102   const char *name;
 103
 104   const char *mbs;
 105   size_t nmbs;
 106   const uint32_t *wcs;
 107   size_t nwcs;
 108   int *mborder;
 109   int wcorder;
 110
 111   /* The following is a bit mask which bits are set if this element is
 112      used in the appropriate level.  Interesting for the singlebyte
 113      weight computation.
 114
 115      XXX The type here restricts the number of levels to 32.  It could
 116      be changed if necessary but I doubt this is necessary.  */
 117   unsigned int used_in_level;
 118
 119   struct element_list_t *weights;
 120
 121   /* Nonzero if this is a real character definition.  */
 122   int is_character;
 123
 124   /* Order of the character in the sequence.  This information will
 125      be used in range expressions.  */
 126   int mbseqorder;
 127   int wcseqorder;
 128
 129   /* Where does the definition come from.  */
 130   const char *file;
 131   size_t line;
 132
 133   /* Which section does this belong to.  */
 134   struct section_list *section;
 135
 136   /* Predecessor and successor in the order list.  */
 137   struct element_t *last;
 138   struct element_t *next;
 139
 140   /* Next element in multibyte output list.  */
 141   struct element_t *mbnext;
 142   struct element_t *mblast;
 143
 144   /* Next element in wide character output list.  */
 145   struct element_t *wcnext;
 146   struct element_t *wclast;
 147 };
 148
 149 /* Special element value.  */
 150 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 151 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 152 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 153
 154 /* Data type for collating symbol.  */
 155 struct symbol_t
 156 {
 157   const char *name;
 158
 159   /* Point to place in the order list.  */
 160   struct element_t *order;
 161
 162   /* Where does the definition come from.  */
 163   const char *file;
 164   size_t line;
 165 };
 166
 167 /* Sparse table of struct element_t *.  */
 168 #define TABLE wchead_table
 169 #define ELEMENT struct element_t *
 170 #define DEFAULT NULL
 171 #define ITERATE
 172 #define NO_ADD_LOCALE
 173 #include "3level.h"
 174
 175 /* Sparse table of int32_t.  */
 176 #define TABLE collidx_table
 177 #define ELEMENT int32_t
 178 #define DEFAULT 0
 179 #include "3level.h"
 180
 181 /* Sparse table of uint32_t.  */
 182 #define TABLE collseq_table
 183 #define ELEMENT uint32_t
 184 #define DEFAULT ~((uint32_t) 0)
 185 #include "3level.h"
 186
 187
 188 /* Simple name list for the preprocessor.  */
 189 struct name_list
 190 {
 191   struct name_list *next;
 192   char str[0];
 193 };
 194
 195
 196 /* The real definition of the struct for the LC_COLLATE locale.  */
 197 struct locale_collate_t
 198 {
 199   int col_weight_max;
 200   int cur_weight_max;
 201
 202   /* List of known scripts.  */
 203   struct section_list *known_sections;
 204   /* List of used sections.  */
 205   struct section_list *sections;
 206   /* Current section using definition.  */
 207   struct section_list *current_section;
 208   /* There always can be an unnamed section.  */
 209   struct section_list unnamed_section;
 210   /* Flag whether the unnamed section has been defined.  */
 211   bool unnamed_section_defined;
 212   /* To make handling of errors easier we have another section.  */
 213   struct section_list error_section;
 214   /* Sometimes we are defining the values for collating symbols before
 215      the first actual section.  */
 216   struct section_list symbol_section;
 217
 218   /* Start of the order list.  */
 219   struct element_t *start;
 220
 221   /* The undefined element.  */
 222   struct element_t undefined;
 223
 224   /* This is the cursor for `reorder_after' insertions.  */
 225   struct element_t *cursor;
 226
 227   /* This value is used when handling ellipsis.  */
 228   struct element_t ellipsis_weight;
 229
 230   /* Known collating elements.  */
 231   hash_table elem_table;
 232
 233   /* Known collating symbols.  */
 234   hash_table sym_table;
 235
 236   /* Known collation sequences.  */
 237   hash_table seq_table;
 238
 239   struct obstack mempool;
 240
 241   /* The LC_COLLATE category is a bit special as it is sometimes possible
 242      that the definitions from more than one input file contains information.
 243      Therefore we keep all relevant input in a list.  */
 244   struct locale_collate_t *next;
 245
 246   /* Arrays with heads of the list for each of the leading bytes in
 247      the multibyte sequences.  */
 248   struct element_t *mbheads[256];
 249
 250   /* Arrays with heads of the list for each of the leading bytes in
 251      the multibyte sequences.  */
 252   struct wchead_table wcheads;
 253
 254   /* The arrays with the collation sequence order.  */
 255   unsigned char mbseqorder[256];
 256   struct collseq_table wcseqorder;
 257
 258   /* State of the preprocessor.  */
 259   enum
 260     {
 261       else_none = 0,
 262       else_ignore,
 263       else_seen
 264     }
 265     else_action;
 266 };
 267
 268
 269 /* We have a few global variables which are used for reading all
 270    LC_COLLATE category descriptions in all files.  */
 271 static uint32_t nrules;
 272
 273 /* List of defined preprocessor symbols.  */
 274 static struct name_list *defined;
 275
 276
 277 /* We need UTF-8 encoding of numbers.  */
 278 static inline int
 279 __attribute ((always_inline))
 280 utf8_encode (char *buf, int val)
 281 {
 282   int retval;
 283
 284   if (val < 0x80)
 285     {
 286       *buf++ = (char) val;
 287       retval = 1;
 288     }
 289   else
 290     {
 291       int step;
 292
 293       for (step = 2; step < 6; ++step)
 294         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 295           break;
 296       retval = step;
 297
 298       *buf = (unsigned char) (~0xff >> step);
 299       --step;
 300       do
 301         {
 302           buf[step] = 0x80 | (val & 0x3f);
 303           val >>= 6;
 304         }
 305       while (--step > 0);
 306       *buf |= val;
 307     }
 308
 309   return retval;
 310 }
 311
 312
 313 static struct section_list *
 314 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 315                    struct section_list *next)
 316 {
 317   struct section_list *newp;
 318
 319   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 320                                                 sizeof (*newp));
 321   newp->next = next;
 322   newp->name = string;
 323   newp->first = NULL;
 324   newp->last = NULL;
 325
 326   return newp;
 327 }
 328
 329
 330 static struct element_t *
 331 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 332              const uint32_t *wcs, const char *name, size_t namelen,
 333              int is_character)
 334 {
 335   struct element_t *newp;
 336
 337   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 338                                              sizeof (*newp));
 339   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 340                                                     name, namelen);
 341   if (mbs != NULL)
 342     {
 343       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 344       newp->nmbs = mbslen;
 345     }
 346   else
 347     {
 348       newp->mbs = NULL;
 349       newp->nmbs = 0;
 350     }
 351   if (wcs != NULL)
 352     {
 353       size_t nwcs = wcslen ((wchar_t *) wcs);
 354       uint32_t zero = 0;
 355       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 356       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 357       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 358       newp->nwcs = nwcs;
 359     }
 360   else
 361     {
 362       newp->wcs = NULL;
 363       newp->nwcs = 0;
 364     }
 365   newp->mborder = NULL;
 366   newp->wcorder = 0;
 367   newp->used_in_level = 0;
 368   newp->is_character = is_character;
 369
 370   /* Will be assigned later.  XXX  */
 371   newp->mbseqorder = 0;
 372   newp->wcseqorder = 0;
 373
 374   /* Will be allocated later.  */
 375   newp->weights = NULL;
 376
 377   newp->file = NULL;
 378   newp->line = 0;
 379
 380   newp->section = collate->current_section;
 381
 382   newp->last = NULL;
 383   newp->next = NULL;
 384
 385   newp->mbnext = NULL;
 386   newp->mblast = NULL;
 387
 388   newp->wcnext = NULL;
 389   newp->wclast = NULL;
 390
 391   return newp;
 392 }
 393
 394
 395 static struct symbol_t *
 396 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 397 {
 398   struct symbol_t *newp;
 399
 400   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 401
 402   newp->name = obstack_copy0 (&collate->mempool, name, len);
 403   newp->order = NULL;
 404
 405   newp->file = NULL;
 406   newp->line = 0;
 407
 408   return newp;
 409 }
 410
 411
 412 /* Test whether this name is already defined somewhere.  */
 413 static int
 414 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 415                  const struct charmap_t *charmap,
 416                  struct repertoire_t *repertoire, const char *symbol,
 417                  size_t symbol_len)
 418 {
 419   void *ignore = NULL;
 420
 421   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 422     {
 423       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 424                 (int) symbol_len, symbol);
 425       return 1;
 426     }
 427
 428   if (repertoire != NULL
 429       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 430           == 0))
 431     {
 432       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 433                 (int) symbol_len, symbol);
 434       return 1;
 435     }
 436
 437   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 438     {
 439       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 440                 (int) symbol_len, symbol);
 441       return 1;
 442     }
 443
 444   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 445     {
 446       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 447                 (int) symbol_len, symbol);
 448       return 1;
 449     }
 450
 451   return 0;
 452 }
 453
 454
 455 /* Read the direction specification.  */
 456 static void
 457 read_directions (struct linereader *ldfile, struct token *arg,
 458                  const struct charmap_t *charmap,
 459                  struct repertoire_t *repertoire, struct localedef_t *result)
 460 {
 461   int cnt = 0;
 462   int max = nrules ?: 10;
 463   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 464   int warned = 0;
 465   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 466
 467   while (1)
 468     {
 469       int valid = 0;
 470
 471       if (arg->tok == tok_forward)
 472         {
 473           if (rules[cnt] & sort_backward)
 474             {
 475               if (! warned)
 476                 {
 477                   lr_error (ldfile, _("\
 478 %s: `forward' and `backward' are mutually excluding each other"),
 479                             "LC_COLLATE");
 480                   warned = 1;
 481                 }
 482             }
 483           else if (rules[cnt] & sort_forward)
 484             {
 485               if (! warned)
 486                 {
 487                   lr_error (ldfile, _("\
 488 %s: `%s' mentioned more than once in definition of weight %d"),
 489                             "LC_COLLATE", "forward", cnt + 1);
 490                 }
 491             }
 492           else
 493             rules[cnt] |= sort_forward;
 494
 495           valid = 1;
 496         }
 497       else if (arg->tok == tok_backward)
 498         {
 499           if (rules[cnt] & sort_forward)
 500             {
 501               if (! warned)
 502                 {
 503                   lr_error (ldfile, _("\
 504 %s: `forward' and `backward' are mutually excluding each other"),
 505                             "LC_COLLATE");
 506                   warned = 1;
 507                 }
 508             }
 509           else if (rules[cnt] & sort_backward)
 510             {
 511               if (! warned)
 512                 {
 513                   lr_error (ldfile, _("\
 514 %s: `%s' mentioned more than once in definition of weight %d"),
 515                             "LC_COLLATE", "backward", cnt + 1);
 516                 }
 517             }
 518           else
 519             rules[cnt] |= sort_backward;
 520
 521           valid = 1;
 522         }
 523       else if (arg->tok == tok_position)
 524         {
 525           if (rules[cnt] & sort_position)
 526             {
 527               if (! warned)
 528                 {
 529                   lr_error (ldfile, _("\
 530 %s: `%s' mentioned more than once in definition of weight %d"),
 531                             "LC_COLLATE", "position", cnt + 1);
 532                 }
 533             }
 534           else
 535             rules[cnt] |= sort_position;
 536
 537           valid = 1;
 538         }
 539
 540       if (valid)
 541         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 542
 543       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 544           || arg->tok == tok_semicolon)
 545         {
 546           if (! valid && ! warned)
 547             {
 548               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 549               warned = 1;
 550             }
 551
 552           /* See whether we have to increment the counter.  */
 553           if (arg->tok != tok_comma && rules[cnt] != 0)
 554             {
 555               /* Add the default `forward' if we have seen only `position'.  */
 556               if (rules[cnt] == sort_position)
 557                 rules[cnt] = sort_position | sort_forward;
 558
 559               ++cnt;
 560             }
 561
 562           if (arg->tok == tok_eof || arg->tok == tok_eol)
 563             /* End of line or file, so we exit the loop.  */
 564             break;
 565
 566           if (nrules == 0)
 567             {
 568               /* See whether we have enough room in the array.  */
 569               if (cnt == max)
 570                 {
 571                   max += 10;
 572                   rules = (enum coll_sort_rule *) xrealloc (rules,
 573                                                             max
 574                                                             * sizeof (*rules));
 575                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 576                 }
 577             }
 578           else
 579             {
 580               if (cnt == nrules)
 581                 {
 582                   /* There must not be any more rule.  */
 583                   if (! warned)
 584                     {
 585                       lr_error (ldfile, _("\
 586 %s: too many rules; first entry only had %d"),
 587                                 "LC_COLLATE", nrules);
 588                       warned = 1;
 589                     }
 590
 591                   lr_ignore_rest (ldfile, 0);
 592                   break;
 593                 }
 594             }
 595         }
 596       else
 597         {
 598           if (! warned)
 599             {
 600               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 601               warned = 1;
 602             }
 603         }
 604
 605       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 606     }
 607
 608   if (nrules == 0)
 609     {
 610       /* Now we know how many rules we have.  */
 611       nrules = cnt;
 612       rules = (enum coll_sort_rule *) xrealloc (rules,
 613                                                 nrules * sizeof (*rules));
 614     }
 615   else
 616     {
 617       if (cnt < nrules)
 618         {
 619           /* Not enough rules in this specification.  */
 620           if (! warned)
 621             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 622
 623           do
 624             rules[cnt] = sort_forward;
 625           while (++cnt < nrules);
 626         }
 627     }
 628
 629   collate->current_section->rules = rules;
 630 }
 631
 632
 633 static struct element_t *
 634 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 635               const char *str, size_t len)
 636 {
 637   void *result = NULL;
 638
 639   /* Search for the entries among the collation sequences already define.  */
 640   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 641     {
 642       /* Nope, not define yet.  So we see whether it is a
 643          collation symbol.  */
 644       void *ptr;
 645
 646       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 647         {
 648           /* It's a collation symbol.  */
 649           struct symbol_t *sym = (struct symbol_t *) ptr;
 650           result = sym->order;
 651
 652           if (result == NULL)
 653             result = sym->order = new_element (collate, NULL, 0, NULL,
 654                                                NULL, 0, 0);
 655         }
 656       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 657         {
 658           /* It's also no collation element.  So it is a character
 659              element defined later.  */
 660           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 661           /* Insert it into the sequence table.  */
 662           insert_entry (&collate->seq_table, str, len, result);
 663         }
 664     }
 665
 666   return (struct element_t *) result;
 667 }
 668
 669
 670 static void
 671 unlink_element (struct locale_collate_t *collate)
 672 {
 673   if (collate->cursor == collate->start)
 674     {
 675       assert (collate->cursor->next == NULL);
 676       assert (collate->cursor->last == NULL);
 677       collate->cursor = NULL;
 678     }
 679   else
 680     {
 681       if (collate->cursor->next != NULL)
 682         collate->cursor->next->last = collate->cursor->last;
 683       if (collate->cursor->last != NULL)
 684         collate->cursor->last->next = collate->cursor->next;
 685       collate->cursor = collate->cursor->last;
 686     }
 687 }
 688
 689
 690 static void
 691 insert_weights (struct linereader *ldfile, struct element_t *elem,
 692                 const struct charmap_t *charmap,
 693                 struct repertoire_t *repertoire, struct localedef_t *result,
 694                 enum token_t ellipsis)
 695 {
 696   int weight_cnt;
 697   struct token *arg;
 698   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 699
 700   /* Initialize all the fields.  */
 701   elem->file = ldfile->fname;
 702   elem->line = ldfile->lineno;
 703
 704   elem->last = collate->cursor;
 705   elem->next = collate->cursor ? collate->cursor->next : NULL;
 706   if (collate->cursor != NULL && collate->cursor->next != NULL)
 707     collate->cursor->next->last = elem;
 708   if (collate->cursor != NULL)
 709     collate->cursor->next = elem;
 710   if (collate->start == NULL)
 711     {
 712       assert (collate->cursor == NULL);
 713       collate->start = elem;
 714     }
 715
 716   elem->section = collate->current_section;
 717
 718   if (collate->current_section->first == NULL)
 719     collate->current_section->first = elem;
 720   if (collate->current_section->last == collate->cursor)
 721     collate->current_section->last = elem;
 722
 723   collate->cursor = elem;
 724
 725   elem->weights = (struct element_list_t *)
 726     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 727   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 728
 729   weight_cnt = 0;
 730
 731   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 732   do
 733     {
 734       if (arg->tok == tok_eof || arg->tok == tok_eol)
 735         break;
 736
 737       if (arg->tok == tok_ignore)
 738         {
 739           /* The weight for this level has to be ignored.  We use the
 740              null pointer to indicate this.  */
 741           elem->weights[weight_cnt].w = (struct element_t **)
 742             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 743           elem->weights[weight_cnt].w[0] = NULL;
 744           elem->weights[weight_cnt].cnt = 1;
 745         }
 746       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 747         {
 748           char ucs4str[10];
 749           struct element_t *val;
 750           char *symstr;
 751           size_t symlen;
 752
 753           if (arg->tok == tok_bsymbol)
 754             {
 755               symstr = arg->val.str.startmb;
 756               symlen = arg->val.str.lenmb;
 757             }
 758           else
 759             {
 760               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 761               symstr = ucs4str;
 762               symlen = 9;
 763             }
 764
 765           val = find_element (ldfile, collate, symstr, symlen);
 766           if (val == NULL)
 767             break;
 768
 769           elem->weights[weight_cnt].w = (struct element_t **)
 770             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 771           elem->weights[weight_cnt].w[0] = val;
 772           elem->weights[weight_cnt].cnt = 1;
 773         }
 774       else if (arg->tok == tok_string)
 775         {
 776           /* Split the string up in the individual characters and put
 777              the element definitions in the list.  */
 778           const char *cp = arg->val.str.startmb;
 779           int cnt = 0;
 780           struct element_t *charelem;
 781           struct element_t **weights = NULL;
 782           int max = 0;
 783
 784           if (*cp == '\0')
 785             {
 786               lr_error (ldfile, _("%s: empty weight string not allowed"),
 787                         "LC_COLLATE");
 788               lr_ignore_rest (ldfile, 0);
 789               break;
 790             }
 791
 792           do
 793             {
 794               if (*cp == '<')
 795                 {
 796                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 797                      the latter we have to unify the name.  */
 798                   const char *startp = ++cp;
 799                   size_t len;
 800
 801                   while (*cp != '>')
 802                     {
 803                       if (*cp == ldfile->escape_char)
 804                         ++cp;
 805                       if (*cp == '\0')
 806                         /* It's a syntax error.  */
 807                         goto syntax;
 808
 809                       ++cp;
 810                     }
 811
 812                   if (cp - startp == 5 && startp[0] == 'U'
 813                       && isxdigit (startp[1]) && isxdigit (startp[2])
 814                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 815                     {
 816                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 817                       char *newstr;
 818
 819                       newstr = (char *) xmalloc (10);
 820                       snprintf (newstr, 10, "U%08X", ucs4);
 821                       startp = newstr;
 822
 823                       len = 9;
 824                     }
 825                   else
 826                     len = cp - startp;
 827
 828                   charelem = find_element (ldfile, collate, startp, len);
 829                   ++cp;
 830                 }
 831               else
 832                 {
 833                   /* People really shouldn't use characters directly in
 834                      the string.  Especially since it's not really clear
 835                      what this means.  We interpret all characters in the
 836                      string as if that would be bsymbols.  Otherwise we
 837                      would have to match back to bsymbols somehow and this
 838                      is normally not what people normally expect.  */
 839                   charelem = find_element (ldfile, collate, cp++, 1);
 840                 }
 841
 842               if (charelem == NULL)
 843                 {
 844                   /* We ignore the rest of the line.  */
 845                   lr_ignore_rest (ldfile, 0);
 846                   break;
 847                 }
 848
 849               /* Add the pointer.  */
 850               if (cnt >= max)
 851                 {
 852                   struct element_t **newp;
 853                   max += 10;
 854                   newp = (struct element_t **)
 855                     alloca (max * sizeof (struct element_t *));
 856                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 857                   weights = newp;
 858                 }
 859               weights[cnt++] = charelem;
 860             }
 861           while (*cp != '\0');
 862
 863           /* Now store the information.  */
 864           elem->weights[weight_cnt].w = (struct element_t **)
 865             obstack_alloc (&collate->mempool,
 866                            cnt * sizeof (struct element_t *));
 867           memcpy (elem->weights[weight_cnt].w, weights,
 868                   cnt * sizeof (struct element_t *));
 869           elem->weights[weight_cnt].cnt = cnt;
 870
 871           /* We don't need the string anymore.  */
 872           free (arg->val.str.startmb);
 873         }
 874       else if (ellipsis != tok_none
 875                && (arg->tok == tok_ellipsis2
 876                    || arg->tok == tok_ellipsis3
 877                    || arg->tok == tok_ellipsis4))
 878         {
 879           /* It must be the same ellipsis as used in the initial column.  */
 880           if (arg->tok != ellipsis)
 881             lr_error (ldfile, _("\
 882 %s: weights must use the same ellipsis symbol as the name"),
 883                       "LC_COLLATE");
 884
 885           /* The weight for this level will depend on the element
 886              iterating over the range.  Put a placeholder.  */
 887           elem->weights[weight_cnt].w = (struct element_t **)
 888             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 889           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 890           elem->weights[weight_cnt].cnt = 1;
 891         }
 892       else
 893         {
 894         syntax:
 895           /* It's a syntax error.  */
 896           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 897           lr_ignore_rest (ldfile, 0);
 898           break;
 899         }
 900
 901       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 902       /* This better should be the end of the line or a semicolon.  */
 903       if (arg->tok == tok_semicolon)
 904         /* OK, ignore this and read the next token.  */
 905         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 906       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 907         {
 908           /* It's a syntax error.  */
 909           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 910           lr_ignore_rest (ldfile, 0);
 911           break;
 912         }
 913     }
 914   while (++weight_cnt < nrules);
 915
 916   if (weight_cnt < nrules)
 917     {
 918       /* This means the rest of the line uses the current element as
 919          the weight.  */
 920       do
 921         {
 922           elem->weights[weight_cnt].w = (struct element_t **)
 923             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 924           if (ellipsis == tok_none)
 925             elem->weights[weight_cnt].w[0] = elem;
 926           else
 927             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 928           elem->weights[weight_cnt].cnt = 1;
 929         }
 930       while (++weight_cnt < nrules);
 931     }
 932   else
 933     {
 934       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 935         {
 936           /* Too many rule values.  */
 937           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 938           lr_ignore_rest (ldfile, 0);
 939         }
 940       else
 941         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 942     }
 943 }
 944
 945
 946 static int
 947 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 948               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 949               struct localedef_t *result)
 950 {
 951   /* First find out what kind of symbol this is.  */
 952   struct charseq *seq;
 953   uint32_t wc;
 954   struct element_t *elem = NULL;
 955   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 956
 957   /* Try to find the character in the charmap.  */
 958   seq = charmap_find_value (charmap, symstr, symlen);
 959
 960   /* Determine the wide character.  */
 961   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 962     {
 963       wc = repertoire_find_value (repertoire, symstr, symlen);
 964       if (seq != NULL)
 965         seq->ucs4 = wc;
 966     }
 967   else
 968     wc = seq->ucs4;
 969
 970   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 971     {
 972       /* It's no character, so look through the collation elements and
 973          symbol list.  */
 974       void *ptr = elem;
 975       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 976         {
 977           void *result;
 978           struct symbol_t *sym = NULL;
 979
 980           /* It's also collation element.  Therefore it's either a
 981              collating symbol or it's a character which is not
 982              supported by the character set.  In the later case we
 983              simply create a dummy entry.  */
 984           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 985             {
 986               /* It's a collation symbol.  */
 987               sym = (struct symbol_t *) result;
 988
 989               elem = sym->order;
 990             }
 991
 992           if (elem == NULL)
 993             {
 994               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 995
 996               if (sym != NULL)
 997                 sym->order = elem;
 998               else
 999                 /* Enter a fake element in the sequence table.  This
1000                    won't cause anything in the output since there is
1001                    no multibyte or wide character associated with
1002                    it.  */
1003                 insert_entry (&collate->seq_table, symstr, symlen, elem);
1004             }
1005         }
1006       else
1007         /* Copy the result back.  */
1008         elem = ptr;
1009     }
1010   else
1011     {
1012       /* Otherwise the symbols stands for a character.  */
1013       void *ptr = elem;
1014       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1015         {
1016           uint32_t wcs[2] = { wc, 0 };
1017
1018           /* We have to allocate an entry.  */
1019           elem = new_element (collate,
1020                               seq != NULL ? (char *) seq->bytes : NULL,
1021                               seq != NULL ? seq->nbytes : 0,
1022                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1023                               symstr, symlen, 1);
1024
1025           /* And add it to the table.  */
1026           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1027             /* This cannot happen.  */
1028             assert (! "Internal error");
1029         }
1030       else
1031         {
1032           /* Copy the result back.  */
1033           elem = ptr;
1034
1035           /* Maybe the character was used before the definition.  In this case
1036              we have to insert the byte sequences now.  */
1037           if (elem->mbs == NULL && seq != NULL)
1038             {
1039               elem->mbs = obstack_copy0 (&collate->mempool,
1040                                          seq->bytes, seq->nbytes);
1041               elem->nmbs = seq->nbytes;
1042             }
1043
1044           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1045             {
1046               uint32_t wcs[2] = { wc, 0 };
1047
1048               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1049               elem->nwcs = 1;
1050             }
1051         }
1052     }
1053
1054   /* Test whether this element is not already in the list.  */
1055   if (elem->next != NULL || elem == collate->cursor)
1056     {
1057       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1058                 (int) symlen, symstr, elem->file, elem->line);
1059       lr_ignore_rest (ldfile, 0);
1060       return 1;
1061     }
1062
1063   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1064
1065   return 0;
1066 }
1067
1068
1069 static void
1070 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1071                  enum token_t ellipsis, const struct charmap_t *charmap,
1072                  struct repertoire_t *repertoire,
1073                  struct localedef_t *result)
1074 {
1075   struct element_t *startp;
1076   struct element_t *endp;
1077   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1078
1079   /* Unlink the entry added for the ellipsis.  */
1080   unlink_element (collate);
1081   startp = collate->cursor;
1082
1083   /* Process and add the end-entry.  */
1084   if (symstr != NULL
1085       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1086     /* Something went wrong with inserting the to-value.  This means
1087        we cannot process the ellipsis.  */
1088     return;
1089
1090   /* Reset the cursor.  */
1091   collate->cursor = startp;
1092
1093   /* Now we have to handle many different situations:
1094      - we have to distinguish between the three different ellipsis forms
1095      - the is the ellipsis at the beginning, in the middle, or at the end.
1096   */
1097   endp = collate->cursor->next;
1098   assert (symstr == NULL || endp != NULL);
1099
1100   /* XXX The following is probably very wrong since also collating symbols
1101      can appear in ranges.  But do we want/can refine the test for that?  */
1102 #if 0
1103   /* Both, the start and the end symbol, must stand for characters.  */
1104   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1105       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1106     {
1107       lr_error (ldfile, _("\
1108 %s: the start and the end symbol of a range must stand for characters"),
1109                 "LC_COLLATE");
1110       return;
1111     }
1112 #endif
1113
1114   if (ellipsis == tok_ellipsis3)
1115     {
1116       /* One requirement we make here: the length of the byte
1117          sequences for the first and end character must be the same.
1118          This is mainly to prevent unwanted effects and this is often
1119          not what is wanted.  */
1120       size_t len = (startp->mbs != NULL ? startp->nmbs
1121                     : (endp->mbs != NULL ? endp->nmbs : 0));
1122       char mbcnt[len + 1];
1123       char mbend[len + 1];
1124
1125       /* Well, this should be caught somewhere else already.  Just to
1126          make sure.  */
1127       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1128       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1129
1130       if (startp != NULL && endp != NULL
1131           && startp->mbs != NULL && endp->mbs != NULL
1132           && startp->nmbs != endp->nmbs)
1133         {
1134           lr_error (ldfile, _("\
1135 %s: byte sequences of first and last character must have the same length"),
1136                     "LC_COLLATE");
1137           return;
1138         }
1139
1140       /* Determine whether we have to generate multibyte sequences.  */
1141       if ((startp == NULL || startp->mbs != NULL)
1142           && (endp == NULL || endp->mbs != NULL))
1143         {
1144           int cnt;
1145           int ret;
1146
1147           /* Prepare the beginning byte sequence.  This is either from the
1148              beginning byte sequence or it is all nulls if it was an
1149              initial ellipsis.  */
1150           if (startp == NULL || startp->mbs == NULL)
1151             memset (mbcnt, '\0', len);
1152           else
1153             {
1154               memcpy (mbcnt, startp->mbs, len);
1155
1156               /* And increment it so that the value is the first one we will
1157                  try to insert.  */
1158               for (cnt = len - 1; cnt >= 0; --cnt)
1159                 if (++mbcnt[cnt] != '\0')
1160                   break;
1161             }
1162           mbcnt[len] = '\0';
1163
1164           /* And the end sequence.  */
1165           if (endp == NULL || endp->mbs == NULL)
1166             memset (mbend, '\0', len);
1167           else
1168             memcpy (mbend, endp->mbs, len);
1169           mbend[len] = '\0';
1170
1171           /* Test whether we have a correct range.  */
1172           ret = memcmp (mbcnt, mbend, len);
1173           if (ret >= 0)
1174             {
1175               if (ret > 0)
1176                 lr_error (ldfile, _("%s: byte sequence of first character of \
1177 range is not lower than that of the last character"), "LC_COLLATE");
1178               return;
1179             }
1180
1181           /* Generate the byte sequences data.  */
1182           while (1)
1183             {
1184               struct charseq *seq;
1185
1186               /* Quite a bit of work ahead.  We have to find the character
1187                  definition for the byte sequence and then determine the
1188                  wide character belonging to it.  */
1189               seq = charmap_find_symbol (charmap, mbcnt, len);
1190               if (seq != NULL)
1191                 {
1192                   struct element_t *elem;
1193                   size_t namelen;
1194
1195                   /* I don't think this can ever happen.  */
1196                   assert (seq->name != NULL);
1197                   namelen = strlen (seq->name);
1198
1199                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1200                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1201                                                        namelen);
1202
1203                   /* Now we are ready to insert the new value in the
1204                      sequence.  Find out whether the element is
1205                      already known.  */
1206                   void *ptr;
1207                   if (find_entry (&collate->seq_table, seq->name, namelen,
1208                                   &ptr) != 0)
1209                     {
1210                       uint32_t wcs[2] = { seq->ucs4, 0 };
1211
1212                       /* We have to allocate an entry.  */
1213                       elem = new_element (collate, mbcnt, len,
1214                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1215                                           ? NULL : wcs, seq->name,
1216                                           namelen, 1);
1217
1218                       /* And add it to the table.  */
1219                       if (insert_entry (&collate->seq_table, seq->name,
1220                                         namelen, elem) != 0)
1221                         /* This cannot happen.  */
1222                         assert (! "Internal error");
1223                     }
1224                   else
1225                     /* Copy the result.  */
1226                     elem = ptr;
1227
1228                   /* Test whether this element is not already in the list.  */
1229                   if (elem->next != NULL || (collate->cursor != NULL
1230                                              && elem->next == collate->cursor))
1231                     {
1232                       lr_error (ldfile, _("\
1233 order for `%.*s' already defined at %s:%Zu"),
1234                                 (int) namelen, seq->name,
1235                                 elem->file, elem->line);
1236                       goto increment;
1237                     }
1238
1239                   /* Enqueue the new element.  */
1240                   elem->last = collate->cursor;
1241                   if (collate->cursor == NULL)
1242                     elem->next = NULL;
1243                   else
1244                     {
1245                       elem->next = collate->cursor->next;
1246                       elem->last->next = elem;
1247                       if (elem->next != NULL)
1248                         elem->next->last = elem;
1249                     }
1250                   if (collate->start == NULL)
1251                     {
1252                       assert (collate->cursor == NULL);
1253                       collate->start = elem;
1254                     }
1255                   collate->cursor = elem;
1256
1257                  /* Add the weight value.  We take them from the
1258                     `ellipsis_weights' member of `collate'.  */
1259                   elem->weights = (struct element_list_t *)
1260                     obstack_alloc (&collate->mempool,
1261                                    nrules * sizeof (struct element_list_t));
1262                   for (cnt = 0; cnt < nrules; ++cnt)
1263                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1264                         && (collate->ellipsis_weight.weights[cnt].w[0]
1265                             == ELEMENT_ELLIPSIS2))
1266                       {
1267                         elem->weights[cnt].w = (struct element_t **)
1268                           obstack_alloc (&collate->mempool,
1269                                          sizeof (struct element_t *));
1270                         elem->weights[cnt].w[0] = elem;
1271                         elem->weights[cnt].cnt = 1;
1272                       }
1273                     else
1274                       {
1275                         /* Simply use the weight from `ellipsis_weight'.  */
1276                         elem->weights[cnt].w =
1277                           collate->ellipsis_weight.weights[cnt].w;
1278                         elem->weights[cnt].cnt =
1279                           collate->ellipsis_weight.weights[cnt].cnt;
1280                       }
1281                 }
1282
1283               /* Increment for the next round.  */
1284             increment:
1285               for (cnt = len - 1; cnt >= 0; --cnt)
1286                 if (++mbcnt[cnt] != '\0')
1287                   break;
1288
1289               /* Find out whether this was all.  */
1290               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1291                 /* Yep, that's all.  */
1292                 break;
1293             }
1294         }
1295     }
1296   else
1297     {
1298       /* For symbolic range we naturally must have a beginning and an
1299          end specified by the user.  */
1300       if (startp == NULL)
1301         lr_error (ldfile, _("\
1302 %s: symbolic range ellipsis must not directly follow `order_start'"),
1303                   "LC_COLLATE");
1304       else if (endp == NULL)
1305         lr_error (ldfile, _("\
1306 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1307                   "LC_COLLATE");
1308       else
1309         {
1310           /* Determine the range.  To do so we have to determine the
1311              common prefix of the both names and then the numeric
1312              values of both ends.  */
1313           size_t lenfrom = strlen (startp->name);
1314           size_t lento = strlen (endp->name);
1315           char buf[lento + 1];
1316           int preflen = 0;
1317           long int from;
1318           long int to;
1319           char *cp;
1320           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1321
1322           if (lenfrom != lento)
1323             {
1324             invalid_range:
1325               lr_error (ldfile, _("\
1326 `%s' and `%.*s' are not valid names for symbolic range"),
1327                         startp->name, (int) lento, endp->name);
1328               return;
1329             }
1330
1331           while (startp->name[preflen] == endp->name[preflen])
1332             if (startp->name[preflen] == '\0')
1333               /* Nothing to be done.  The start and end point are identical
1334                  and while inserting the end point we have already given
1335                  the user an error message.  */
1336               return;
1337             else
1338               ++preflen;
1339
1340           errno = 0;
1341           from = strtol (startp->name + preflen, &cp, base);
1342           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1343             goto invalid_range;
1344
1345           errno = 0;
1346           to = strtol (endp->name + preflen, &cp, base);
1347           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1348             goto invalid_range;
1349
1350           /* Copy the prefix.  */
1351           memcpy (buf, startp->name, preflen);
1352
1353           /* Loop over all values.  */
1354           for (++from; from < to; ++from)
1355             {
1356               struct element_t *elem = NULL;
1357               struct charseq *seq;
1358               uint32_t wc;
1359               int cnt;
1360
1361               /* Generate the name.  */
1362               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1363                        (int) (lenfrom - preflen), from);
1364
1365               /* Look whether this name is already defined.  */
1366               void *ptr;
1367               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1368                 {
1369                   /* Copy back the result.  */
1370                   elem = ptr;
1371
1372                   if (elem->next != NULL || (collate->cursor != NULL
1373                                              && elem->next == collate->cursor))
1374                     {
1375                       lr_error (ldfile, _("\
1376 %s: order for `%.*s' already defined at %s:%Zu"),
1377                                 "LC_COLLATE", (int) lenfrom, buf,
1378                                 elem->file, elem->line);
1379                       continue;
1380                     }
1381
1382                   if (elem->name == NULL)
1383                     {
1384                       lr_error (ldfile, _("%s: `%s' must be a character"),
1385                                 "LC_COLLATE", buf);
1386                       continue;
1387                     }
1388                 }
1389
1390               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1391                 {
1392                   /* Search for a character of this name.  */
1393                   seq = charmap_find_value (charmap, buf, lenfrom);
1394                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1395                     {
1396                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1397
1398                       if (seq != NULL)
1399                         seq->ucs4 = wc;
1400                     }
1401                   else
1402                     wc = seq->ucs4;
1403
1404                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1405                     /* We don't know anything about a character with this
1406                        name.  XXX Should we warn?  */
1407                     continue;
1408
1409                   if (elem == NULL)
1410                     {
1411                       uint32_t wcs[2] = { wc, 0 };
1412
1413                       /* We have to allocate an entry.  */
1414                       elem = new_element (collate,
1415                                           seq != NULL
1416                                           ? (char *) seq->bytes : NULL,
1417                                           seq != NULL ? seq->nbytes : 0,
1418                                           wc == ILLEGAL_CHAR_VALUE
1419                                           ? NULL : wcs, buf, lenfrom, 1);
1420                     }
1421                   else
1422                     {
1423                       /* Update the element.  */
1424                       if (seq != NULL)
1425                         {
1426                           elem->mbs = obstack_copy0 (&collate->mempool,
1427                                                      seq->bytes, seq->nbytes);
1428                           elem->nmbs = seq->nbytes;
1429                         }
1430
1431                       if (wc != ILLEGAL_CHAR_VALUE)
1432                         {
1433                           uint32_t zero = 0;
1434
1435                           obstack_grow (&collate->mempool,
1436                                         &wc, sizeof (uint32_t));
1437                           obstack_grow (&collate->mempool,
1438                                         &zero, sizeof (uint32_t));
1439                           elem->wcs = obstack_finish (&collate->mempool);
1440                           elem->nwcs = 1;
1441                         }
1442                     }
1443
1444                   elem->file = ldfile->fname;
1445                   elem->line = ldfile->lineno;
1446                   elem->section = collate->current_section;
1447                 }
1448
1449               /* Enqueue the new element.  */
1450               elem->last = collate->cursor;
1451               elem->next = collate->cursor->next;
1452               elem->last->next = elem;
1453               if (elem->next != NULL)
1454                 elem->next->last = elem;
1455               collate->cursor = elem;
1456
1457               /* Now add the weights.  They come from the `ellipsis_weights'
1458                  member of `collate'.  */
1459               elem->weights = (struct element_list_t *)
1460                 obstack_alloc (&collate->mempool,
1461                                nrules * sizeof (struct element_list_t));
1462               for (cnt = 0; cnt < nrules; ++cnt)
1463                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1464                     && (collate->ellipsis_weight.weights[cnt].w[0]
1465                         == ELEMENT_ELLIPSIS2))
1466                   {
1467                     elem->weights[cnt].w = (struct element_t **)
1468                       obstack_alloc (&collate->mempool,
1469                                      sizeof (struct element_t *));
1470                     elem->weights[cnt].w[0] = elem;
1471                     elem->weights[cnt].cnt = 1;
1472                   }
1473                 else
1474                   {
1475                     /* Simly use the weight from `ellipsis_weight'.  */
1476                     elem->weights[cnt].w =
1477                       collate->ellipsis_weight.weights[cnt].w;
1478                     elem->weights[cnt].cnt =
1479                       collate->ellipsis_weight.weights[cnt].cnt;
1480                   }
1481             }
1482         }
1483     }
1484 }
1485
1486
1487 static void
1488 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1489                  struct localedef_t *copy_locale, int ignore_content)
1490 {
1491   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1492     {
1493       struct locale_collate_t *collate;
1494
1495       if (copy_locale == NULL)
1496         {
1497           collate = locale->categories[LC_COLLATE].collate =
1498             (struct locale_collate_t *)
1499             xcalloc (1, sizeof (struct locale_collate_t));
1500
1501           /* Init the various data structures.  */
1502           init_hash (&collate->elem_table, 100);
1503           init_hash (&collate->sym_table, 100);
1504           init_hash (&collate->seq_table, 500);
1505           obstack_init (&collate->mempool);
1506
1507           collate->col_weight_max = -1;
1508         }
1509       else
1510         /* Reuse the copy_locale's data structures.  */
1511         collate = locale->categories[LC_COLLATE].collate =
1512           copy_locale->categories[LC_COLLATE].collate;
1513     }
1514
1515   ldfile->translate_strings = 0;
1516   ldfile->return_widestr = 0;
1517 }
1518
1519
1520 void
1521 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1522 {
1523   /* Now is the time when we can assign the individual collation
1524      values for all the symbols.  We have possibly different values
1525      for the wide- and the multibyte-character symbols.  This is done
1526      since it might make a difference in the encoding if there is in
1527      some cases no multibyte-character but there are wide-characters.
1528      (The other way around it is not important since theencoded
1529      collation value in the wide-character case is 32 bits wide and
1530      therefore requires no encoding).
1531
1532      The lowest collation value assigned is 2.  Zero is reserved for
1533      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1534      functions and 1 is used to separate the individual passes for the
1535      different rules.
1536
1537      We also have to construct is list with all the bytes/words which
1538      can come first in a sequence, followed by all the elements which
1539      also start with this byte/word.  The order is reverse which has
1540      among others the important effect that longer strings are located
1541      first in the list.  This is required for the output data since
1542      the algorithm used in `strcoll' etc depends on this.
1543
1544      The multibyte case is easy.  We simply sort into an array with
1545      256 elements.  */
1546   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1547   int mbact[nrules];
1548   int wcact;
1549   int mbseqact;
1550   int wcseqact;
1551   struct element_t *runp;
1552   int i;
1553   int need_undefined = 0;
1554   struct section_list *sect;
1555   int ruleidx;
1556   int nr_wide_elems = 0;
1557
1558   if (collate == NULL)
1559     {
1560       /* No data, no check.  */
1561       if (! be_quiet)
1562         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1563                                 "LC_COLLATE"));
1564       return;
1565     }
1566
1567   /* If this assertion is hit change the type in `element_t'.  */
1568   assert (nrules <= sizeof (runp->used_in_level) * 8);
1569
1570   /* Make sure that the `position' rule is used either in all sections
1571      or in none.  */
1572   for (i = 0; i < nrules; ++i)
1573     for (sect = collate->sections; sect != NULL; sect = sect->next)
1574       if (sect != collate->current_section
1575           && sect->rules != NULL
1576           && ((sect->rules[i] & sort_position)
1577               != (collate->current_section->rules[i] & sort_position)))
1578         {
1579           WITH_CUR_LOCALE (error (0, 0, _("\
1580 %s: `position' must be used for a specific level in all sections or none"),
1581                                   "LC_COLLATE"));
1582           break;
1583         }
1584
1585   /* Find out which elements are used at which level.  At the same
1586      time we find out whether we have any undefined symbols.  */
1587   runp = collate->start;
1588   while (runp != NULL)
1589     {
1590       if (runp->mbs != NULL)
1591         {
1592           for (i = 0; i < nrules; ++i)
1593             {
1594               int j;
1595
1596               for (j = 0; j < runp->weights[i].cnt; ++j)
1597                 /* A NULL pointer as the weight means IGNORE.  */
1598                 if (runp->weights[i].w[j] != NULL)
1599                   {
1600                     if (runp->weights[i].w[j]->weights == NULL)
1601                       {
1602                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1603                                                         runp->line,
1604                                                         _("symbol `%s' not defined"),
1605                                                         runp->weights[i].w[j]->name));
1606
1607                         need_undefined = 1;
1608                         runp->weights[i].w[j] = &collate->undefined;
1609                       }
1610                     else
1611                       /* Set the bit for the level.  */
1612                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1613                   }
1614             }
1615         }
1616
1617       /* Up to the next entry.  */
1618       runp = runp->next;
1619     }
1620
1621   /* Walk through the list of defined sequences and assign weights.  Also
1622      create the data structure which will allow generating the single byte
1623      character based tables.
1624
1625      Since at each time only the weights for each of the rules are
1626      only compared to other weights for this rule it is possible to
1627      assign more compact weight values than simply counting all
1628      weights in sequence.  We can assign weights from 3, one for each
1629      rule individually and only for those elements, which are actually
1630      used for this rule.
1631
1632      Why is this important?  It is not for the wide char table.  But
1633      it is for the singlebyte output since here larger numbers have to
1634      be encoded to make it possible to emit the value as a byte
1635      string.  */
1636   for (i = 0; i < nrules; ++i)
1637     mbact[i] = 2;
1638   wcact = 2;
1639   mbseqact = 0;
1640   wcseqact = 0;
1641   runp = collate->start;
1642   while (runp != NULL)
1643     {
1644       /* Determine the order.  */
1645       if (runp->used_in_level != 0)
1646         {
1647           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1648                                                  nrules * sizeof (int));
1649
1650           for (i = 0; i < nrules; ++i)
1651             if ((runp->used_in_level & (1 << i)) != 0)
1652               runp->mborder[i] = mbact[i]++;
1653             else
1654               runp->mborder[i] = 0;
1655         }
1656
1657       if (runp->mbs != NULL)
1658         {
1659           struct element_t **eptr;
1660           struct element_t *lastp = NULL;
1661
1662           /* Find the point where to insert in the list.  */
1663           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1664           while (*eptr != NULL)
1665             {
1666               if ((*eptr)->nmbs < runp->nmbs)
1667                 break;
1668
1669               if ((*eptr)->nmbs == runp->nmbs)
1670                 {
1671                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1672
1673                   if (c == 0)
1674                     {
1675                       /* This should not happen.  It means that we have
1676                          to symbols with the same byte sequence.  It is
1677                          of course an error.  */
1678                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1679                                                       (*eptr)->line,
1680                                                       _("\
1681 symbol `%s' has the same encoding as"), (*eptr)->name);
1682                                        error_at_line (0, 0, runp->file,
1683                                                       runp->line,
1684                                                       _("symbol `%s'"),
1685                                                       runp->name));
1686                       goto dont_insert;
1687                     }
1688                   else if (c < 0)
1689                     /* Insert it here.  */
1690                     break;
1691                 }
1692
1693               /* To the next entry.  */
1694               lastp = *eptr;
1695               eptr = &(*eptr)->mbnext;
1696             }
1697
1698           /* Set the pointers.  */
1699           runp->mbnext = *eptr;
1700           runp->mblast = lastp;
1701           if (*eptr != NULL)
1702             (*eptr)->mblast = runp;
1703           *eptr = runp;
1704         dont_insert:
1705           ;
1706         }
1707
1708       if (runp->used_in_level)
1709         {
1710           runp->wcorder = wcact++;
1711
1712           /* We take the opportunity to count the elements which have
1713              wide characters.  */
1714           ++nr_wide_elems;
1715         }
1716
1717       if (runp->is_character)
1718         {
1719           if (runp->nmbs == 1)
1720             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1721
1722           runp->wcseqorder = wcseqact++;
1723         }
1724       else if (runp->mbs != NULL && runp->weights != NULL)
1725         /* This is for collation elements.  */
1726         runp->wcseqorder = wcseqact++;
1727
1728       /* Up to the next entry.  */
1729       runp = runp->next;
1730     }
1731
1732   /* Find out whether any of the `mbheads' entries is unset.  In this
1733      case we use the UNDEFINED entry.  */
1734   for (i = 1; i < 256; ++i)
1735     if (collate->mbheads[i] == NULL)
1736       {
1737         need_undefined = 1;
1738         collate->mbheads[i] = &collate->undefined;
1739       }
1740
1741   /* Now to the wide character case.  */
1742   collate->wcheads.p = 6;
1743   collate->wcheads.q = 10;
1744   wchead_table_init (&collate->wcheads);
1745
1746   collate->wcseqorder.p = 6;
1747   collate->wcseqorder.q = 10;
1748   collseq_table_init (&collate->wcseqorder);
1749
1750   /* Start adding.  */
1751   runp = collate->start;
1752   while (runp != NULL)
1753     {
1754       if (runp->wcs != NULL)
1755         {
1756           struct element_t *e;
1757           struct element_t **eptr;
1758           struct element_t *lastp;
1759
1760           /* Insert the collation sequence value.  */
1761           if (runp->is_character)
1762             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1763                                runp->wcseqorder);
1764
1765           /* Find the point where to insert in the list.  */
1766           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1767           eptr = &e;
1768           lastp = NULL;
1769           while (*eptr != NULL)
1770             {
1771               if ((*eptr)->nwcs < runp->nwcs)
1772                 break;
1773
1774               if ((*eptr)->nwcs == runp->nwcs)
1775                 {
1776                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1777                                    (wchar_t *) runp->wcs, runp->nwcs);
1778
1779                   if (c == 0)
1780                     {
1781                       /* This should not happen.  It means that we have
1782                          two symbols with the same byte sequence.  It is
1783                          of course an error.  */
1784                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1785                                                       (*eptr)->line,
1786                                                       _("\
1787 symbol `%s' has the same encoding as"), (*eptr)->name);
1788                                        error_at_line (0, 0, runp->file,
1789                                                       runp->line,
1790                                                       _("symbol `%s'"),
1791                                                       runp->name));
1792                       goto dont_insertwc;
1793                     }
1794                   else if (c < 0)
1795                     /* Insert it here.  */
1796                     break;
1797                 }
1798
1799               /* To the next entry.  */
1800               lastp = *eptr;
1801               eptr = &(*eptr)->wcnext;
1802             }
1803
1804           /* Set the pointers.  */
1805           runp->wcnext = *eptr;
1806           runp->wclast = lastp;
1807           if (*eptr != NULL)
1808             (*eptr)->wclast = runp;
1809           *eptr = runp;
1810           if (eptr == &e)
1811             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1812         dont_insertwc:
1813           ;
1814         }
1815
1816       /* Up to the next entry.  */
1817       runp = runp->next;
1818     }
1819
1820   /* Now determine whether the UNDEFINED entry is needed and if yes,
1821      whether it was defined.  */
1822   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1823   if (collate->undefined.file == NULL)
1824     {
1825       if (need_undefined)
1826         {
1827           /* This seems not to be enforced by recent standards.  Don't
1828              emit an error, simply append UNDEFINED at the end.  */
1829           if (0)
1830             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1831
1832           /* Add UNDEFINED at the end.  */
1833           collate->undefined.mborder =
1834             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1835
1836           for (i = 0; i < nrules; ++i)
1837             collate->undefined.mborder[i] = mbact[i]++;
1838         }
1839
1840       /* In any case we will need the definition for the wide character
1841          case.  But we will not complain that it is missing since the
1842          specification strangely enough does not seem to account for
1843          this.  */
1844       collate->undefined.wcorder = wcact++;
1845     }
1846
1847   /* Finally, try to unify the rules for the sections.  Whenever the rules
1848      for a section are the same as those for another section give the
1849      ruleset the same index.  Since there are never many section we can
1850      use an O(n^2) algorithm here.  */
1851   sect = collate->sections;
1852   while (sect != NULL && sect->rules == NULL)
1853     sect = sect->next;
1854
1855   /* Bail out if we have no sections because of earlier errors.  */
1856   if (sect == NULL)
1857     {
1858       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1859                               _("too many errors; giving up")));
1860       return;
1861     }
1862
1863   ruleidx = 0;
1864   do
1865     {
1866       struct section_list *osect = collate->sections;
1867
1868       while (osect != sect)
1869         if (osect->rules != NULL
1870             && memcmp (osect->rules, sect->rules,
1871                        nrules * sizeof (osect->rules[0])) == 0)
1872           break;
1873         else
1874           osect = osect->next;
1875
1876       if (osect == sect)
1877         sect->ruleidx = ruleidx++;
1878       else
1879         sect->ruleidx = osect->ruleidx;
1880
1881       /* Next section.  */
1882       do
1883         sect = sect->next;
1884       while (sect != NULL && sect->rules == NULL);
1885     }
1886   while (sect != NULL);
1887   /* We are currently not prepared for more than 128 rulesets.  But this
1888      should never really be a problem.  */
1889   assert (ruleidx <= 128);
1890 }
1891
1892
1893 static int32_t
1894 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1895                struct element_t *elem)
1896 {
1897   size_t cnt;
1898   int32_t retval;
1899
1900   /* Optimize the use of UNDEFINED.  */
1901   if (elem == &collate->undefined)
1902     /* The weights are already inserted.  */
1903     return 0;
1904
1905   /* This byte can start exactly one collation element and this is
1906      a single byte.  We can directly give the index to the weights.  */
1907   retval = obstack_object_size (pool);
1908
1909   /* Construct the weight.  */
1910   for (cnt = 0; cnt < nrules; ++cnt)
1911     {
1912       char buf[elem->weights[cnt].cnt * 7];
1913       int len = 0;
1914       int i;
1915
1916       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1917         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1918         if (elem->weights[cnt].w[i] != NULL)
1919           len += utf8_encode (&buf[len],
1920                               elem->weights[cnt].w[i]->mborder[cnt]);
1921
1922       /* And add the buffer content.  */
1923       obstack_1grow (pool, len);
1924       obstack_grow (pool, buf, len);
1925     }
1926
1927   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1928 }
1929
1930
1931 static int32_t
1932 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1933                  struct element_t *elem)
1934 {
1935   size_t cnt;
1936   int32_t retval;
1937
1938   /* Optimize the use of UNDEFINED.  */
1939   if (elem == &collate->undefined)
1940     /* The weights are already inserted.  */
1941     return 0;
1942
1943   /* This byte can start exactly one collation element and this is
1944      a single byte.  We can directly give the index to the weights.  */
1945   retval = obstack_object_size (pool) / sizeof (int32_t);
1946
1947   /* Construct the weight.  */
1948   for (cnt = 0; cnt < nrules; ++cnt)
1949     {
1950       int32_t buf[elem->weights[cnt].cnt];
1951       int i;
1952       int32_t j;
1953
1954       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1955         if (elem->weights[cnt].w[i] != NULL)
1956           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1957
1958       /* And add the buffer content.  */
1959       obstack_int32_grow (pool, j);
1960
1961       obstack_grow (pool, buf, j * sizeof (int32_t));
1962       maybe_swap_uint32_obstack (pool, j);
1963     }
1964
1965   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1966 }
1967
1968 /* If localedef is every threaded, this would need to be __thread var.  */
1969 static struct
1970 {
1971   struct obstack *weightpool;
1972   struct obstack *extrapool;
1973   struct obstack *indpool;
1974   struct locale_collate_t *collate;
1975   struct collidx_table *tablewc;
1976 } atwc;
1977
1978 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1979
1980 static void
1981 add_to_tablewc (uint32_t ch, struct element_t *runp)
1982 {
1983   if (runp->wcnext == NULL && runp->nwcs == 1)
1984     {
1985       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1986                                            runp);
1987       collidx_table_add (atwc.tablewc, ch, weigthidx);
1988     }
1989   else
1990     {
1991       /* As for the singlebyte table, we recognize sequences and
1992          compress them.  */
1993
1994       collidx_table_add (atwc.tablewc, ch,
1995                          -(obstack_object_size (atwc.extrapool)
1996                          / sizeof (uint32_t)));
1997
1998       do
1999         {
2000           /* Store the current index in the weight table.  We know that
2001              the current position in the `extrapool' is aligned on a
2002              32-bit address.  */
2003           int32_t weightidx;
2004           int added;
2005
2006           /* Find out wether this is a single entry or we have more than
2007              one consecutive entry.  */
2008           if (runp->wcnext != NULL
2009               && runp->nwcs == runp->wcnext->nwcs
2010               && wmemcmp ((wchar_t *) runp->wcs,
2011                           (wchar_t *)runp->wcnext->wcs,
2012                           runp->nwcs - 1) == 0
2013               && (runp->wcs[runp->nwcs - 1]
2014                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2015             {
2016               int i;
2017               struct element_t *series_startp = runp;
2018               struct element_t *curp;
2019
2020               /* Now add first the initial byte sequence.  */
2021               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2022               if (sizeof (int32_t) == sizeof (int))
2023                 obstack_make_room (atwc.extrapool, added);
2024
2025               /* More than one consecutive entry.  We mark this by having
2026                  a negative index into the indirect table.  */
2027               obstack_int32_grow_fast (atwc.extrapool,
2028                                        -(obstack_object_size (atwc.indpool)
2029                                          / sizeof (int32_t)));
2030               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2031
2032               do
2033                 runp = runp->wcnext;
2034               while (runp->wcnext != NULL
2035                      && runp->nwcs == runp->wcnext->nwcs
2036                      && wmemcmp ((wchar_t *) runp->wcs,
2037                                  (wchar_t *)runp->wcnext->wcs,
2038                                  runp->nwcs - 1) == 0
2039                      && (runp->wcs[runp->nwcs - 1]
2040                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2041
2042               /* Now walk backward from here to the beginning.  */
2043               curp = runp;
2044
2045               for (i = 1; i < runp->nwcs; ++i)
2046                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2047
2048               /* Now find the end of the consecutive sequence and
2049                  add all the indeces in the indirect pool.  */
2050               do
2051                 {
2052                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2053                                                curp);
2054                   obstack_int32_grow (atwc.indpool, weightidx);
2055
2056                   curp = curp->wclast;
2057                 }
2058               while (curp != series_startp);
2059
2060               /* Add the final weight.  */
2061               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2062                                            curp);
2063               obstack_int32_grow (atwc.indpool, weightidx);
2064
2065               /* And add the end byte sequence.  Without length this
2066                  time.  */
2067               for (i = 1; i < curp->nwcs; ++i)
2068                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2069             }
2070           else
2071             {
2072               /* A single entry.  Simply add the index and the length and
2073                  string (except for the first character which is already
2074                  tested for).  */
2075               int i;
2076
2077               /* Output the weight info.  */
2078               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2079                                            runp);
2080
2081               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2082               if (sizeof (int) == sizeof (int32_t))
2083                 obstack_make_room (atwc.extrapool, added);
2084
2085               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2086               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2087               for (i = 1; i < runp->nwcs; ++i)
2088                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2089             }
2090
2091           /* Next entry.  */
2092           runp = runp->wcnext;
2093         }
2094       while (runp != NULL);
2095     }
2096 }
2097
2098 void
2099 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2100                 const char *output_path)
2101 {
2102   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2103   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2104   struct locale_file file;
2105   size_t ch;
2106   int32_t tablemb[256];
2107   struct obstack weightpool;
2108   struct obstack extrapool;
2109   struct obstack indirectpool;
2110   struct section_list *sect;
2111   struct collidx_table tablewc;
2112   uint32_t elem_size;
2113   uint32_t *elem_table;
2114   int i;
2115   struct element_t *runp;
2116
2117   init_locale_data (&file, nelems);
2118   add_locale_uint32 (&file, nrules);
2119
2120   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2121   if (collate == NULL)
2122     {
2123       size_t idx;
2124       for (idx = 1; idx < nelems; idx++)
2125         {
2126           /* The words have to be handled specially.  */
2127           if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2128             add_locale_uint32 (&file, 0);
2129           else
2130             add_locale_empty (&file);
2131         }
2132       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2133       return;
2134     }
2135
2136   obstack_init (&weightpool);
2137   obstack_init (&extrapool);
2138   obstack_init (&indirectpool);
2139
2140   /* Since we are using the sign of an integer to mark indirection the
2141      offsets in the arrays we are indirectly referring to must not be
2142      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2143   obstack_int32_grow (&extrapool, 0);
2144   obstack_int32_grow (&indirectpool, 0);
2145
2146   /* Prepare the ruleset table.  */
2147   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2148     if (sect->rules != NULL && sect->ruleidx == i)
2149       {
2150         int j;
2151
2152         obstack_make_room (&weightpool, nrules);
2153
2154         for (j = 0; j < nrules; ++j)
2155           obstack_1grow_fast (&weightpool, sect->rules[j]);
2156         ++i;
2157       }
2158   /* And align the output.  */
2159   i = (nrules * i) % LOCFILE_ALIGN;
2160   if (i > 0)
2161     do
2162       obstack_1grow (&weightpool, '\0');
2163     while (++i < LOCFILE_ALIGN);
2164
2165   add_locale_raw_obstack (&file, &weightpool);
2166
2167   /* Generate the 8-bit table.  Walk through the lists of sequences
2168      starting with the same byte and add them one after the other to
2169      the table.  In case we have more than one sequence starting with
2170      the same byte we have to use extra indirection.
2171
2172      First add a record for the NUL byte.  This entry will never be used
2173      so it does not matter.  */
2174   tablemb[0] = 0;
2175
2176   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2177      will probably be used more than once it is good to store the
2178      weights only once.  */
2179   if (collate->undefined.used_in_level != 0)
2180     output_weight (&weightpool, collate, &collate->undefined);
2181
2182   for (ch = 1; ch < 256; ++ch)
2183     if (collate->mbheads[ch]->mbnext == NULL
2184         && collate->mbheads[ch]->nmbs <= 1)
2185       {
2186         tablemb[ch] = output_weight (&weightpool, collate,
2187                                      collate->mbheads[ch]);
2188       }
2189     else
2190       {
2191         /* The entries in the list are sorted by length and then
2192            alphabetically.  This is the order in which we will add the
2193            elements to the collation table.  This allows simply walking
2194            the table in sequence and stopping at the first matching
2195            entry.  Since the longer sequences are coming first in the
2196            list they have the possibility to match first, just as it
2197            has to be.  In the worst case we are walking to the end of
2198            the list where we put, if no singlebyte sequence is defined
2199            in the locale definition, the weights for UNDEFINED.
2200
2201            To reduce the length of the search list we compress them a bit.
2202            This happens by collecting sequences of consecutive byte
2203            sequences in one entry (having and begin and end byte sequence)
2204            and add only one index into the weight table.  We can find the
2205            consecutive entries since they are also consecutive in the list.  */
2206         struct element_t *runp = collate->mbheads[ch];
2207         struct element_t *lastp;
2208
2209         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2210
2211         tablemb[ch] = -obstack_object_size (&extrapool);
2212
2213         do
2214           {
2215             /* Store the current index in the weight table.  We know that
2216                the current position in the `extrapool' is aligned on a
2217                32-bit address.  */
2218             int32_t weightidx;
2219             int added;
2220
2221             /* Find out wether this is a single entry or we have more than
2222                one consecutive entry.  */
2223             if (runp->mbnext != NULL
2224                 && runp->nmbs == runp->mbnext->nmbs
2225                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2226                 && (runp->mbs[runp->nmbs - 1]
2227                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2228               {
2229                 int i;
2230                 struct element_t *series_startp = runp;
2231                 struct element_t *curp;
2232
2233                 /* Compute how much space we will need.  */
2234                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2235                                           + 2 * (runp->nmbs - 1));
2236                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2237                 obstack_make_room (&extrapool, added);
2238
2239                 /* More than one consecutive entry.  We mark this by having
2240                    a negative index into the indirect table.  */
2241                 obstack_int32_grow_fast (&extrapool,
2242                                          -(obstack_object_size (&indirectpool)
2243                                            / sizeof (int32_t)));
2244
2245                 /* Now search first the end of the series.  */
2246                 do
2247                   runp = runp->mbnext;
2248                 while (runp->mbnext != NULL
2249                        && runp->nmbs == runp->mbnext->nmbs
2250                        && memcmp (runp->mbs, runp->mbnext->mbs,
2251                                   runp->nmbs - 1) == 0
2252                        && (runp->mbs[runp->nmbs - 1]
2253                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2254
2255                 /* Now walk backward from here to the beginning.  */
2256                 curp = runp;
2257
2258                 assert (runp->nmbs <= 256);
2259                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2260                 for (i = 1; i < curp->nmbs; ++i)
2261                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2262
2263                 /* Now find the end of the consecutive sequence and
2264                    add all the indeces in the indirect pool.  */
2265                 do
2266                   {
2267                     weightidx = output_weight (&weightpool, collate, curp);
2268                     obstack_int32_grow (&indirectpool, weightidx);
2269
2270                     curp = curp->mblast;
2271                   }
2272                 while (curp != series_startp);
2273
2274                 /* Add the final weight.  */
2275                 weightidx = output_weight (&weightpool, collate, curp);
2276                 obstack_int32_grow (&indirectpool, weightidx);
2277
2278                 /* And add the end byte sequence.  Without length this
2279                    time.  */
2280                 for (i = 1; i < curp->nmbs; ++i)
2281                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2282               }
2283             else
2284               {
2285                 /* A single entry.  Simply add the index and the length and
2286                    string (except for the first character which is already
2287                    tested for).  */
2288                 int i;
2289
2290                 /* Output the weight info.  */
2291                 weightidx = output_weight (&weightpool, collate, runp);
2292
2293                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2294                                           + runp->nmbs - 1);
2295                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2296                 obstack_make_room (&extrapool, added);
2297
2298                 obstack_int32_grow_fast (&extrapool, weightidx);
2299                 assert (runp->nmbs <= 256);
2300                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2301
2302                 for (i = 1; i < runp->nmbs; ++i)
2303                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2304               }
2305
2306             /* Add alignment bytes if necessary.  */
2307             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2308               obstack_1grow_fast (&extrapool, '\0');
2309
2310             /* Next entry.  */
2311             lastp = runp;
2312             runp = runp->mbnext;
2313           }
2314         while (runp != NULL);
2315
2316         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2317
2318         /* If the final entry in the list is not a single character we
2319            add an UNDEFINED entry here.  */
2320         if (lastp->nmbs != 1)
2321           {
2322             int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2323             obstack_make_room (&extrapool, added);
2324
2325             obstack_int32_grow_fast (&extrapool, 0);
2326             /* XXX What rule? We just pick the first.  */
2327             obstack_1grow_fast (&extrapool, 0);
2328             /* Length is zero.  */
2329             obstack_1grow_fast (&extrapool, 0);
2330
2331             /* Add alignment bytes if necessary.  */
2332             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2333               obstack_1grow_fast (&extrapool, '\0');
2334           }
2335       }
2336
2337   /* Add padding to the tables if necessary.  */
2338   while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2339     obstack_1grow (&weightpool, 0);
2340
2341   /* Now add the four tables.  */
2342   add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2343   add_locale_raw_obstack (&file, &weightpool);
2344   add_locale_raw_obstack (&file, &extrapool);
2345   add_locale_raw_obstack (&file, &indirectpool);
2346
2347   /* Now the same for the wide character table.  We need to store some
2348      more information here.  */
2349   add_locale_empty (&file);
2350   add_locale_empty (&file);
2351   add_locale_empty (&file);
2352
2353   /* Since we are using the sign of an integer to mark indirection the
2354      offsets in the arrays we are indirectly referring to must not be
2355      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2356   obstack_int32_grow (&extrapool, 0);
2357   obstack_int32_grow (&indirectpool, 0);
2358
2359   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2360      will probably be used more than once it is good to store the
2361      weights only once.  */
2362   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2363     abort ();
2364
2365   /* Generate the table.  Walk through the lists of sequences starting
2366      with the same wide character and add them one after the other to
2367      the table.  In case we have more than one sequence starting with
2368      the same byte we have to use extra indirection.  */
2369   tablewc.p = 6;
2370   tablewc.q = 10;
2371   collidx_table_init (&tablewc);
2372
2373   atwc.weightpool = &weightpool;
2374   atwc.extrapool = &extrapool;
2375   atwc.indpool = &indirectpool;
2376   atwc.collate = collate;
2377   atwc.tablewc = &tablewc;
2378
2379   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2380
2381   memset (&atwc, 0, sizeof (atwc));
2382
2383   /* Now add the four tables.  */
2384   add_locale_collidx_table (&file, &tablewc);
2385   add_locale_raw_obstack (&file, &weightpool);
2386   add_locale_raw_obstack (&file, &extrapool);
2387   add_locale_raw_obstack (&file, &indirectpool);
2388
2389   /* Finally write the table with collation element names out.  It is
2390      a hash table with a simple function which gets the name of the
2391      character as the input.  One character might have many names.  The
2392      value associated with the name is an index into the weight table
2393      where we are then interested in the first-level weight value.
2394
2395      To determine how large the table should be we are counting the
2396      elements have to put in.  Since we are using internal chaining
2397      using a secondary hash function we have to make the table a bit
2398      larger to avoid extremely long search times.  We can achieve
2399      good results with a 40% larger table than there are entries.  */
2400   elem_size = 0;
2401   runp = collate->start;
2402   while (runp != NULL)
2403     {
2404       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2405         /* Yep, the element really counts.  */
2406         ++elem_size;
2407
2408       runp = runp->next;
2409     }
2410   /* Add 40% and find the next prime number.  */
2411   elem_size = next_prime (elem_size * 1.4);
2412
2413   /* Allocate the table.  Each entry consists of two words: the hash
2414      value and an index in a secondary table which provides the index
2415      into the weight table and the string itself (so that a match can
2416      be determined).  */
2417   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2418                                            elem_size * 2 * sizeof (uint32_t));
2419   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2420
2421   /* Now add the elements.  */
2422   runp = collate->start;
2423   while (runp != NULL)
2424     {
2425       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2426         {
2427           /* Compute the hash value of the name.  */
2428           uint32_t namelen = strlen (runp->name);
2429           uint32_t hash = elem_hash (runp->name, namelen);
2430           size_t idx = hash % elem_size;
2431 #ifndef NDEBUG
2432           size_t start_idx = idx;
2433 #endif
2434
2435           if (elem_table[idx * 2] != 0)
2436             {
2437               /* The spot is already taken.  Try iterating using the value
2438                  from the secondary hashing function.  */
2439               size_t iter = hash % (elem_size - 2) + 1;
2440
2441               do
2442                 {
2443                   idx += iter;
2444                   if (idx >= elem_size)
2445                     idx -= elem_size;
2446                   assert (idx != start_idx);
2447                 }
2448               while (elem_table[idx * 2] != 0);
2449             }
2450           /* This is the spot where we will insert the value.  */
2451           elem_table[idx * 2] = hash;
2452           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2453
2454           /* The string itself including length.  */
2455           obstack_1grow (&extrapool, namelen);
2456           obstack_grow (&extrapool, runp->name, namelen);
2457
2458           /* And the multibyte representation.  */
2459           obstack_1grow (&extrapool, runp->nmbs);
2460           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2461
2462           /* And align again to 32 bits.  */
2463           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2464             obstack_grow (&extrapool, "\0\0",
2465                           (sizeof (int32_t)
2466                            - ((1 + namelen + 1 + runp->nmbs)
2467                               % sizeof (int32_t))));
2468
2469           /* Now some 32-bit values: multibyte collation sequence,
2470              wide char string (including length), and wide char
2471              collation sequence.  */
2472           obstack_int32_grow (&extrapool, runp->mbseqorder);
2473
2474           obstack_int32_grow (&extrapool, runp->nwcs);
2475           obstack_grow (&extrapool, runp->wcs,
2476                         runp->nwcs * sizeof (uint32_t));
2477           maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2478
2479           obstack_int32_grow (&extrapool, runp->wcseqorder);
2480         }
2481
2482       runp = runp->next;
2483     }
2484
2485   /* Prepare to write out this data.  */
2486   add_locale_uint32 (&file, elem_size);
2487   add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2488   add_locale_raw_obstack (&file, &extrapool);
2489   add_locale_raw_data (&file, collate->mbseqorder, 256);
2490   add_locale_collseq_table (&file, &collate->wcseqorder);
2491   add_locale_string (&file, charmap->code_set_name);
2492   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2493
2494   obstack_free (&weightpool, NULL);
2495   obstack_free (&extrapool, NULL);
2496   obstack_free (&indirectpool, NULL);
2497 }
2498
2499
2500 static enum token_t
2501 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2502          const struct charmap_t *charmap, int to_endif)
2503 {
2504   while (1)
2505     {
2506       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2507       enum token_t nowtok = now->tok;
2508
2509       if (nowtok == tok_eof || nowtok == tok_end)
2510         return nowtok;
2511
2512       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2513         {
2514           lr_error (ldfile, _("%s: nested conditionals not supported"),
2515                     "LC_COLLATE");
2516           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2517           if (nowtok == tok_eof || nowtok == tok_end)
2518             return nowtok;
2519         }
2520       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2521         {
2522           lr_ignore_rest (ldfile, 1);
2523           return nowtok;
2524         }
2525       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2526         {
2527           /* Do not read the rest of the line.  */
2528           return nowtok;
2529         }
2530       else if (nowtok == tok_else)
2531         {
2532           lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2533         }
2534
2535       lr_ignore_rest (ldfile, 0);
2536     }
2537 }
2538
2539
2540 void
2541 collate_read (struct linereader *ldfile, struct localedef_t *result,
2542               const struct charmap_t *charmap, const char *repertoire_name,
2543               int ignore_content)
2544 {
2545   struct repertoire_t *repertoire = NULL;
2546   struct locale_collate_t *collate;
2547   struct token *now;
2548   struct token *arg = NULL;
2549   enum token_t nowtok;
2550   enum token_t was_ellipsis = tok_none;
2551   struct localedef_t *copy_locale = NULL;
2552   /* Parsing state:
2553      0 - start
2554      1 - between `order-start' and `order-end'
2555      2 - after `order-end'
2556      3 - after `reorder-after', waiting for `reorder-end'
2557      4 - after `reorder-end'
2558      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2559      6 - after `reorder-sections-end'
2560   */
2561   int state = 0;
2562
2563   /* Get the repertoire we have to use.  */
2564   if (repertoire_name != NULL)
2565     repertoire = repertoire_read (repertoire_name);
2566
2567   /* The rest of the line containing `LC_COLLATE' must be free.  */
2568   lr_ignore_rest (ldfile, 1);
2569
2570   while (1)
2571     {
2572       do
2573         {
2574           now = lr_token (ldfile, charmap, result, NULL, verbose);
2575           nowtok = now->tok;
2576         }
2577       while (nowtok == tok_eol);
2578
2579       if (nowtok != tok_define)
2580         break;
2581
2582       if (ignore_content)
2583         lr_ignore_rest (ldfile, 0);
2584       else
2585         {
2586           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2587           if (arg->tok != tok_ident)
2588             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2589           else
2590             {
2591               /* Simply add the new symbol.  */
2592               struct name_list *newsym = xmalloc (sizeof (*newsym)
2593                                                   + arg->val.str.lenmb + 1);
2594               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2595               newsym->str[arg->val.str.lenmb] = '\0';
2596               newsym->next = defined;
2597               defined = newsym;
2598
2599               lr_ignore_rest (ldfile, 1);
2600             }
2601         }
2602     }
2603
2604   if (nowtok == tok_copy)
2605     {
2606       now = lr_token (ldfile, charmap, result, NULL, verbose);
2607       if (now->tok != tok_string)
2608         {
2609           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2610
2611         skip_category:
2612           do
2613             now = lr_token (ldfile, charmap, result, NULL, verbose);
2614           while (now->tok != tok_eof && now->tok != tok_end);
2615
2616           if (now->tok != tok_eof
2617               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2618                   now->tok == tok_eof))
2619             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2620           else if (now->tok != tok_lc_collate)
2621             {
2622               lr_error (ldfile, _("\
2623 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2624               lr_ignore_rest (ldfile, 0);
2625             }
2626           else
2627             lr_ignore_rest (ldfile, 1);
2628
2629           return;
2630         }
2631
2632       if (! ignore_content)
2633         {
2634           /* Get the locale definition.  */
2635           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2636                                      repertoire_name, charmap, NULL);
2637           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2638             {
2639               /* Not yet loaded.  So do it now.  */
2640               if (locfile_read (copy_locale, charmap) != 0)
2641                 goto skip_category;
2642             }
2643
2644           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2645             return;
2646         }
2647
2648       lr_ignore_rest (ldfile, 1);
2649
2650       now = lr_token (ldfile, charmap, result, NULL, verbose);
2651       nowtok = now->tok;
2652     }
2653
2654   /* Prepare the data structures.  */
2655   collate_startup (ldfile, result, copy_locale, ignore_content);
2656   collate = result->categories[LC_COLLATE].collate;
2657
2658   while (1)
2659     {
2660       char ucs4buf[10];
2661       char *symstr;
2662       size_t symlen;
2663
2664       /* Of course we don't proceed beyond the end of file.  */
2665       if (nowtok == tok_eof)
2666         break;
2667
2668       /* Ingore empty lines.  */
2669       if (nowtok == tok_eol)
2670         {
2671           now = lr_token (ldfile, charmap, result, NULL, verbose);
2672           nowtok = now->tok;
2673           continue;
2674         }
2675
2676       switch (nowtok)
2677         {
2678         case tok_copy:
2679           /* Allow copying other locales.  */
2680           now = lr_token (ldfile, charmap, result, NULL, verbose);
2681           if (now->tok != tok_string)
2682             goto err_label;
2683
2684           if (! ignore_content)
2685             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2686                          charmap, result);
2687
2688           lr_ignore_rest (ldfile, 1);
2689           break;
2690
2691         case tok_coll_weight_max:
2692           /* Ignore the rest of the line if we don't need the input of
2693              this line.  */
2694           if (ignore_content)
2695             {
2696               lr_ignore_rest (ldfile, 0);
2697               break;
2698             }
2699
2700           if (state != 0)
2701             goto err_label;
2702
2703           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2704           if (arg->tok != tok_number)
2705             goto err_label;
2706           if (collate->col_weight_max != -1)
2707             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2708                       "LC_COLLATE", "col_weight_max");
2709           else
2710             collate->col_weight_max = arg->val.num;
2711           lr_ignore_rest (ldfile, 1);
2712           break;
2713
2714         case tok_section_symbol:
2715           /* Ignore the rest of the line if we don't need the input of
2716              this line.  */
2717           if (ignore_content)
2718             {
2719               lr_ignore_rest (ldfile, 0);
2720               break;
2721             }
2722
2723           if (state != 0)
2724             goto err_label;
2725
2726           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2727           if (arg->tok != tok_bsymbol)
2728             goto err_label;
2729           else if (!ignore_content)
2730             {
2731               /* Check whether this section is already known.  */
2732               struct section_list *known = collate->sections;
2733               while (known != NULL)
2734                 {
2735                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2736                     break;
2737                   known = known->next;
2738                 }
2739
2740               if (known != NULL)
2741                 {
2742                   lr_error (ldfile,
2743                             _("%s: duplicate declaration of section `%s'"),
2744                             "LC_COLLATE", arg->val.str.startmb);
2745                   free (arg->val.str.startmb);
2746                 }
2747               else
2748                 collate->sections = make_seclist_elem (collate,
2749                                                        arg->val.str.startmb,
2750                                                        collate->sections);
2751
2752               lr_ignore_rest (ldfile, known == NULL);
2753             }
2754           else
2755             {
2756               free (arg->val.str.startmb);
2757               lr_ignore_rest (ldfile, 0);
2758             }
2759           break;
2760
2761         case tok_collating_element:
2762           /* Ignore the rest of the line if we don't need the input of
2763              this line.  */
2764           if (ignore_content)
2765             {
2766               lr_ignore_rest (ldfile, 0);
2767               break;
2768             }
2769
2770           if (state != 0 && state != 2)
2771             goto err_label;
2772
2773           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2774           if (arg->tok != tok_bsymbol)
2775             goto err_label;
2776           else
2777             {
2778               const char *symbol = arg->val.str.startmb;
2779               size_t symbol_len = arg->val.str.lenmb;
2780
2781               /* Next the `from' keyword.  */
2782               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2783               if (arg->tok != tok_from)
2784                 {
2785                   free ((char *) symbol);
2786                   goto err_label;
2787                 }
2788
2789               ldfile->return_widestr = 1;
2790               ldfile->translate_strings = 1;
2791
2792               /* Finally the string with the replacement.  */
2793               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2794
2795               ldfile->return_widestr = 0;
2796               ldfile->translate_strings = 0;
2797
2798               if (arg->tok != tok_string)
2799                 goto err_label;
2800
2801               if (!ignore_content && symbol != NULL)
2802                 {
2803                   /* The name is already defined.  */
2804                   if (check_duplicate (ldfile, collate, charmap,
2805                                        repertoire, symbol, symbol_len))
2806                     goto col_elem_free;
2807
2808                   if (arg->val.str.startmb != NULL)
2809                     insert_entry (&collate->elem_table, symbol, symbol_len,
2810                                   new_element (collate,
2811                                                arg->val.str.startmb,
2812                                                arg->val.str.lenmb - 1,
2813                                                arg->val.str.startwc,
2814                                                symbol, symbol_len, 0));
2815                 }
2816               else
2817                 {
2818                 col_elem_free:
2819                   free ((char *) symbol);
2820                   free (arg->val.str.startmb);
2821                   free (arg->val.str.startwc);
2822                 }
2823               lr_ignore_rest (ldfile, 1);
2824             }
2825           break;
2826
2827         case tok_collating_symbol:
2828           /* Ignore the rest of the line if we don't need the input of
2829              this line.  */
2830           if (ignore_content)
2831             {
2832               lr_ignore_rest (ldfile, 0);
2833               break;
2834             }
2835
2836           if (state != 0 && state != 2)
2837             goto err_label;
2838
2839           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2840           if (arg->tok != tok_bsymbol)
2841             goto err_label;
2842           else
2843             {
2844               char *symbol = arg->val.str.startmb;
2845               size_t symbol_len = arg->val.str.lenmb;
2846               char *endsymbol = NULL;
2847               size_t endsymbol_len = 0;
2848               enum token_t ellipsis = tok_none;
2849
2850               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2851               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2852                 {
2853                   ellipsis = arg->tok;
2854
2855                   arg = lr_token (ldfile, charmap, result, repertoire,
2856                                   verbose);
2857                   if (arg->tok != tok_bsymbol)
2858                     {
2859                       free (symbol);
2860                       goto err_label;
2861                     }
2862
2863                   endsymbol = arg->val.str.startmb;
2864                   endsymbol_len = arg->val.str.lenmb;
2865
2866                   lr_ignore_rest (ldfile, 1);
2867                 }
2868               else if (arg->tok != tok_eol)
2869                 {
2870                   free (symbol);
2871                   goto err_label;
2872                 }
2873
2874               if (!ignore_content)
2875                 {
2876                   if (symbol == NULL
2877                       || (ellipsis != tok_none && endsymbol == NULL))
2878                     {
2879                       lr_error (ldfile, _("\
2880 %s: unknown character in collating symbol name"),
2881                                 "LC_COLLATE");
2882                       goto col_sym_free;
2883                     }
2884                   else if (ellipsis == tok_none)
2885                     {
2886                       /* A single symbol, no ellipsis.  */
2887                       if (check_duplicate (ldfile, collate, charmap,
2888                                            repertoire, symbol, symbol_len))
2889                         /* The name is already defined.  */
2890                         goto col_sym_free;
2891
2892                       insert_entry (&collate->sym_table, symbol, symbol_len,
2893                                     new_symbol (collate, symbol, symbol_len));
2894                     }
2895                   else if (symbol_len != endsymbol_len)
2896                     {
2897                     col_sym_inv_range:
2898                       lr_error (ldfile,
2899                                 _("invalid names for character range"));
2900                       goto col_sym_free;
2901                     }
2902                   else
2903                     {
2904                       /* Oh my, we have to handle an ellipsis.  First, as
2905                          usual, determine the common prefix and then
2906                          convert the rest into a range.  */
2907                       size_t prefixlen;
2908                       unsigned long int from;
2909                       unsigned long int to;
2910                       char *endp;
2911
2912                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2913                         if (symbol[prefixlen] != endsymbol[prefixlen])
2914                           break;
2915
2916                       /* Convert the rest into numbers.  */
2917                       symbol[symbol_len] = '\0';
2918                       from = strtoul (&symbol[prefixlen], &endp,
2919                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2920                       if (*endp != '\0')
2921                         goto col_sym_inv_range;
2922
2923                       endsymbol[symbol_len] = '\0';
2924                       to = strtoul (&endsymbol[prefixlen], &endp,
2925                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2926                       if (*endp != '\0')
2927                         goto col_sym_inv_range;
2928
2929                       if (from > to)
2930                         goto col_sym_inv_range;
2931
2932                       /* Now loop over all entries.  */
2933                       while (from <= to)
2934                         {
2935                           char *symbuf;
2936
2937                           symbuf = (char *) obstack_alloc (&collate->mempool,
2938                                                            symbol_len + 1);
2939
2940                           /* Create the name.  */
2941                           sprintf (symbuf,
2942                                    ellipsis == tok_ellipsis2
2943                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2944                                    (int) prefixlen, symbol,
2945                                    (int) (symbol_len - prefixlen), from);
2946
2947                           if (check_duplicate (ldfile, collate, charmap,
2948                                                repertoire, symbuf, symbol_len))
2949                             /* The name is already defined.  */
2950                             goto col_sym_free;
2951
2952                           insert_entry (&collate->sym_table, symbuf,
2953                                         symbol_len,
2954                                         new_symbol (collate, symbuf,
2955                                                     symbol_len));
2956
2957                           /* Increment the counter.  */
2958                           ++from;
2959                         }
2960
2961                       goto col_sym_free;
2962                     }
2963                 }
2964               else
2965                 {
2966                 col_sym_free:
2967                   free (symbol);
2968                   free (endsymbol);
2969                 }
2970             }
2971           break;
2972
2973         case tok_symbol_equivalence:
2974           /* Ignore the rest of the line if we don't need the input of
2975              this line.  */
2976           if (ignore_content)
2977             {
2978               lr_ignore_rest (ldfile, 0);
2979               break;
2980             }
2981
2982           if (state != 0)
2983             goto err_label;
2984
2985           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2986           if (arg->tok != tok_bsymbol)
2987             goto err_label;
2988           else
2989             {
2990               const char *newname = arg->val.str.startmb;
2991               size_t newname_len = arg->val.str.lenmb;
2992               const char *symname;
2993               size_t symname_len;
2994               void *symval;     /* Actually struct symbol_t*  */
2995
2996               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2997               if (arg->tok != tok_bsymbol)
2998                 {
2999                   free ((char *) newname);
3000                   goto err_label;
3001                 }
3002
3003               symname = arg->val.str.startmb;
3004               symname_len = arg->val.str.lenmb;
3005
3006               if (newname == NULL)
3007                 {
3008                   lr_error (ldfile, _("\
3009 %s: unknown character in equivalent definition name"),
3010                             "LC_COLLATE");
3011
3012                 sym_equiv_free:
3013                   free ((char *) newname);
3014                   free ((char *) symname);
3015                   break;
3016                 }
3017               if (symname == NULL)
3018                 {
3019                   lr_error (ldfile, _("\
3020 %s: unknown character in equivalent definition value"),
3021                             "LC_COLLATE");
3022                   goto sym_equiv_free;
3023                 }
3024
3025               /* See whether the symbol name is already defined.  */
3026               if (find_entry (&collate->sym_table, symname, symname_len,
3027                               &symval) != 0)
3028                 {
3029                   lr_error (ldfile, _("\
3030 %s: unknown symbol `%s' in equivalent definition"),
3031                             "LC_COLLATE", symname);
3032                   goto sym_equiv_free;
3033                 }
3034
3035               if (insert_entry (&collate->sym_table,
3036                                 newname, newname_len, symval) < 0)
3037                 {
3038                   lr_error (ldfile, _("\
3039 error while adding equivalent collating symbol"));
3040                   goto sym_equiv_free;
3041                 }
3042
3043               free ((char *) symname);
3044             }
3045           lr_ignore_rest (ldfile, 1);
3046           break;
3047
3048         case tok_script:
3049           /* Ignore the rest of the line if we don't need the input of
3050              this line.  */
3051           if (ignore_content)
3052             {
3053               lr_ignore_rest (ldfile, 0);
3054               break;
3055             }
3056
3057           /* We get told about the scripts we know.  */
3058           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3059           if (arg->tok != tok_bsymbol)
3060             goto err_label;
3061           else
3062             {
3063               struct section_list *runp = collate->known_sections;
3064               char *name;
3065
3066               while (runp != NULL)
3067                 if (strncmp (runp->name, arg->val.str.startmb,
3068                              arg->val.str.lenmb) == 0
3069                     && runp->name[arg->val.str.lenmb] == '\0')
3070                   break;
3071                 else
3072                   runp = runp->def_next;
3073
3074               if (runp != NULL)
3075                 {
3076                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3077                             runp->name);
3078                   lr_ignore_rest (ldfile, 0);
3079                   break;
3080                 }
3081
3082               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3083               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3084               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3085               name[arg->val.str.lenmb] = '\0';
3086               runp->name = name;
3087
3088               runp->def_next = collate->known_sections;
3089               collate->known_sections = runp;
3090             }
3091           lr_ignore_rest (ldfile, 1);
3092           break;
3093
3094         case tok_order_start:
3095           /* Ignore the rest of the line if we don't need the input of
3096              this line.  */
3097           if (ignore_content)
3098             {
3099               lr_ignore_rest (ldfile, 0);
3100               break;
3101             }
3102
3103           if (state != 0 && state != 1 && state != 2)
3104             goto err_label;
3105           state = 1;
3106
3107           /* The 14652 draft does not specify whether all `order_start' lines
3108              must contain the same number of sort-rules, but 14651 does.  So
3109              we require this here as well.  */
3110           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3111           if (arg->tok == tok_bsymbol)
3112             {
3113               /* This better should be a section name.  */
3114               struct section_list *sp = collate->known_sections;
3115               while (sp != NULL
3116                      && (sp->name == NULL
3117                          || strncmp (sp->name, arg->val.str.startmb,
3118                                      arg->val.str.lenmb) != 0
3119                          || sp->name[arg->val.str.lenmb] != '\0'))
3120                 sp = sp->def_next;
3121
3122               if (sp == NULL)
3123                 {
3124                   lr_error (ldfile, _("\
3125 %s: unknown section name `%.*s'"),
3126                             "LC_COLLATE", (int) arg->val.str.lenmb,
3127                             arg->val.str.startmb);
3128                   /* We use the error section.  */
3129                   collate->current_section = &collate->error_section;
3130
3131                   if (collate->error_section.first == NULL)
3132                     {
3133                       /* Insert &collate->error_section at the end of
3134                          the collate->sections list.  */
3135                       if (collate->sections == NULL)
3136                         collate->sections = &collate->error_section;
3137                       else
3138                         {
3139                           sp = collate->sections;
3140                           while (sp->next != NULL)
3141                             sp = sp->next;
3142
3143                           sp->next = &collate->error_section;
3144                         }
3145                       collate->error_section.next = NULL;
3146                     }
3147                 }
3148               else
3149                 {
3150                   /* One should not be allowed to open the same
3151                      section twice.  */
3152                   if (sp->first != NULL)
3153                     lr_error (ldfile, _("\
3154 %s: multiple order definitions for section `%s'"),
3155                               "LC_COLLATE", sp->name);
3156                   else
3157                     {
3158                       /* Insert sp in the collate->sections list,
3159                          right after collate->current_section.  */
3160                       if (collate->current_section != NULL)
3161                         {
3162                           sp->next = collate->current_section->next;
3163                           collate->current_section->next = sp;
3164                         }
3165                       else if (collate->sections == NULL)
3166                         /* This is the first section to be defined.  */
3167                         collate->sections = sp;
3168
3169                       collate->current_section = sp;
3170                     }
3171
3172                   /* Next should come the end of the line or a semicolon.  */
3173                   arg = lr_token (ldfile, charmap, result, repertoire,
3174                                   verbose);
3175                   if (arg->tok == tok_eol)
3176                     {
3177                       uint32_t cnt;
3178
3179                       /* This means we have exactly one rule: `forward'.  */
3180                       if (nrules > 1)
3181                         lr_error (ldfile, _("\
3182 %s: invalid number of sorting rules"),
3183                                   "LC_COLLATE");
3184                       else
3185                         nrules = 1;
3186                       sp->rules = obstack_alloc (&collate->mempool,
3187                                                  (sizeof (enum coll_sort_rule)
3188                                                   * nrules));
3189                       for (cnt = 0; cnt < nrules; ++cnt)
3190                         sp->rules[cnt] = sort_forward;
3191
3192                       /* Next line.  */
3193                       break;
3194                     }
3195
3196                   /* Get the next token.  */
3197                   arg = lr_token (ldfile, charmap, result, repertoire,
3198                                   verbose);
3199                 }
3200             }
3201           else
3202             {
3203               /* There is no section symbol.  Therefore we use the unnamed
3204                  section.  */
3205               collate->current_section = &collate->unnamed_section;
3206
3207               if (collate->unnamed_section_defined)
3208                 lr_error (ldfile, _("\
3209 %s: multiple order definitions for unnamed section"),
3210                           "LC_COLLATE");
3211               else
3212                 {
3213                   /* Insert &collate->unnamed_section at the beginning of
3214                      the collate->sections list.  */
3215                   collate->unnamed_section.next = collate->sections;
3216                   collate->sections = &collate->unnamed_section;
3217                   collate->unnamed_section_defined = true;
3218                 }
3219             }
3220
3221           /* Now read the direction names.  */
3222           read_directions (ldfile, arg, charmap, repertoire, result);
3223
3224           /* From now we need the strings untranslated.  */
3225           ldfile->translate_strings = 0;
3226           break;
3227
3228         case tok_order_end:
3229           /* Ignore the rest of the line if we don't need the input of
3230              this line.  */
3231           if (ignore_content)
3232             {
3233               lr_ignore_rest (ldfile, 0);
3234               break;
3235             }
3236
3237           if (state != 1)
3238             goto err_label;
3239
3240           /* Handle ellipsis at end of list.  */
3241           if (was_ellipsis != tok_none)
3242             {
3243               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3244                                repertoire, result);
3245               was_ellipsis = tok_none;
3246             }
3247
3248           state = 2;
3249           lr_ignore_rest (ldfile, 1);
3250           break;
3251
3252         case tok_reorder_after:
3253           /* Ignore the rest of the line if we don't need the input of
3254              this line.  */
3255           if (ignore_content)
3256             {
3257               lr_ignore_rest (ldfile, 0);
3258               break;
3259             }
3260
3261           if (state == 1)
3262             {
3263               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3264                         "LC_COLLATE");
3265               state = 2;
3266
3267               /* Handle ellipsis at end of list.  */
3268               if (was_ellipsis != tok_none)
3269                 {
3270                   handle_ellipsis (ldfile, arg->val.str.startmb,
3271                                    arg->val.str.lenmb, was_ellipsis, charmap,
3272                                    repertoire, result);
3273                   was_ellipsis = tok_none;
3274                 }
3275             }
3276           else if (state == 0 && copy_locale == NULL)
3277             goto err_label;
3278           else if (state != 0 && state != 2 && state != 3)
3279             goto err_label;
3280           state = 3;
3281
3282           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3283           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3284             {
3285               /* Find this symbol in the sequence table.  */
3286               char ucsbuf[10];
3287               char *startmb;
3288               size_t lenmb;
3289               struct element_t *insp;
3290               int no_error = 1;
3291               void *ptr;
3292
3293               if (arg->tok == tok_bsymbol)
3294                 {
3295                   startmb = arg->val.str.startmb;
3296                   lenmb = arg->val.str.lenmb;
3297                 }
3298               else
3299                 {
3300                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3301                   startmb = ucsbuf;
3302                   lenmb = 9;
3303                 }
3304
3305               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3306                 /* Yes, the symbol exists.  Simply point the cursor
3307                    to it.  */
3308                 collate->cursor = (struct element_t *) ptr;
3309               else
3310                 {
3311                   struct symbol_t *symbp;
3312                   void *ptr;
3313
3314                   if (find_entry (&collate->sym_table, startmb, lenmb,
3315                                   &ptr) == 0)
3316                     {
3317                       symbp = ptr;
3318
3319                       if (symbp->order->last != NULL
3320                           || symbp->order->next != NULL)
3321                         collate->cursor = symbp->order;
3322                       else
3323                         {
3324                           /* This is a collating symbol but its position
3325                              is not yet defined.  */
3326                           lr_error (ldfile, _("\
3327 %s: order for collating symbol %.*s not yet defined"),
3328                                     "LC_COLLATE", (int) lenmb, startmb);
3329                           collate->cursor = NULL;
3330                           no_error = 0;
3331                         }
3332                     }
3333                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3334                                        &ptr) == 0)
3335                     {
3336                       insp = (struct element_t *) ptr;
3337
3338                       if (insp->last != NULL || insp->next != NULL)
3339                         collate->cursor = insp;
3340                       else
3341                         {
3342                           /* This is a collating element but its position
3343                              is not yet defined.  */
3344                           lr_error (ldfile, _("\
3345 %s: order for collating element %.*s not yet defined"),
3346                                     "LC_COLLATE", (int) lenmb, startmb);
3347                           collate->cursor = NULL;
3348                           no_error = 0;
3349                         }
3350                     }
3351                   else
3352                     {
3353                       /* This is bad.  The symbol after which we have to
3354                          insert does not exist.  */
3355                       lr_error (ldfile, _("\
3356 %s: cannot reorder after %.*s: symbol not known"),
3357                                 "LC_COLLATE", (int) lenmb, startmb);
3358                       collate->cursor = NULL;
3359                       no_error = 0;
3360                     }
3361                 }
3362
3363               lr_ignore_rest (ldfile, no_error);
3364             }
3365           else
3366             /* This must not happen.  */
3367             goto err_label;
3368           break;
3369
3370         case tok_reorder_end:
3371           /* Ignore the rest of the line if we don't need the input of
3372              this line.  */
3373           if (ignore_content)
3374             break;
3375
3376           if (state != 3)
3377             goto err_label;
3378           state = 4;
3379           lr_ignore_rest (ldfile, 1);
3380           break;
3381
3382         case tok_reorder_sections_after:
3383           /* Ignore the rest of the line if we don't need the input of
3384              this line.  */
3385           if (ignore_content)
3386             {
3387               lr_ignore_rest (ldfile, 0);
3388               break;
3389             }
3390
3391           if (state == 1)
3392             {
3393               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3394                         "LC_COLLATE");
3395               state = 2;
3396
3397               /* Handle ellipsis at end of list.  */
3398               if (was_ellipsis != tok_none)
3399                 {
3400                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3401                                    repertoire, result);
3402                   was_ellipsis = tok_none;
3403                 }
3404             }
3405           else if (state == 3)
3406             {
3407               WITH_CUR_LOCALE (error (0, 0, _("\
3408 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3409               state = 4;
3410             }
3411           else if (state != 2 && state != 4)
3412             goto err_label;
3413           state = 5;
3414
3415           /* Get the name of the sections we are adding after.  */
3416           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3417           if (arg->tok == tok_bsymbol)
3418             {
3419               /* Now find a section with this name.  */
3420               struct section_list *runp = collate->sections;
3421
3422               while (runp != NULL)
3423                 {
3424                   if (runp->name != NULL
3425                       && strlen (runp->name) == arg->val.str.lenmb
3426                       && memcmp (runp->name, arg->val.str.startmb,
3427                                  arg->val.str.lenmb) == 0)
3428                     break;
3429
3430                   runp = runp->next;
3431                 }
3432
3433               if (runp != NULL)
3434                 collate->current_section = runp;
3435               else
3436                 {
3437                   /* This is bad.  The section after which we have to
3438                      reorder does not exist.  Therefore we cannot
3439                      process the whole rest of this reorder
3440                      specification.  */
3441                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3442                             "LC_COLLATE", (int) arg->val.str.lenmb,
3443                             arg->val.str.startmb);
3444
3445                   do
3446                     {
3447                       lr_ignore_rest (ldfile, 0);
3448
3449                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3450                     }
3451                   while (now->tok == tok_reorder_sections_after
3452                          || now->tok == tok_reorder_sections_end
3453                          || now->tok == tok_end);
3454
3455                   /* Process the token we just saw.  */
3456                   nowtok = now->tok;
3457                   continue;
3458                 }
3459             }
3460           else
3461             /* This must not happen.  */
3462             goto err_label;
3463           break;
3464
3465         case tok_reorder_sections_end:
3466           /* Ignore the rest of the line if we don't need the input of
3467              this line.  */
3468           if (ignore_content)
3469             break;
3470
3471           if (state != 5)
3472             goto err_label;
3473           state = 6;
3474           lr_ignore_rest (ldfile, 1);
3475           break;
3476
3477         case tok_bsymbol:
3478         case tok_ucs4:
3479           /* Ignore the rest of the line if we don't need the input of
3480              this line.  */
3481           if (ignore_content)
3482             {
3483               lr_ignore_rest (ldfile, 0);
3484               break;
3485             }
3486
3487           if (state != 0 && state != 1 && state != 3 && state != 5)
3488             goto err_label;
3489
3490           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3491             goto err_label;
3492
3493           if (nowtok == tok_ucs4)
3494             {
3495               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3496               symstr = ucs4buf;
3497               symlen = 9;
3498             }
3499           else if (arg != NULL)
3500             {
3501               symstr = arg->val.str.startmb;
3502               symlen = arg->val.str.lenmb;
3503             }
3504           else
3505             {
3506               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3507                         (int) ldfile->token.val.str.lenmb,
3508                         ldfile->token.val.str.startmb);
3509               break;
3510             }
3511
3512           struct element_t *seqp;
3513           if (state == 0)
3514             {
3515               /* We are outside an `order_start' region.  This means
3516                  we must only accept definitions of values for
3517                  collation symbols since these are purely abstract
3518                  values and don't need directions associated.  */
3519               void *ptr;
3520
3521               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3522                 {
3523                   seqp = ptr;
3524
3525                   /* It's already defined.  First check whether this
3526                      is really a collating symbol.  */
3527                   if (seqp->is_character)
3528                     goto err_label;
3529
3530                   goto move_entry;
3531                 }
3532               else
3533                 {
3534                   void *result;
3535
3536                   if (find_entry (&collate->sym_table, symstr, symlen,
3537                                   &result) != 0)
3538                     /* No collating symbol, it's an error.  */
3539                     goto err_label;
3540
3541                   /* Maybe this is the first time we define a symbol
3542                      value and it is before the first actual section.  */
3543                   if (collate->sections == NULL)
3544                     collate->sections = collate->current_section =
3545                       &collate->symbol_section;
3546                 }
3547
3548               if (was_ellipsis != tok_none)
3549                 {
3550                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3551                                    charmap, repertoire, result);
3552
3553                   /* Remember that we processed the ellipsis.  */
3554                   was_ellipsis = tok_none;
3555
3556                   /* And don't add the value a second time.  */
3557                   break;
3558                 }
3559             }
3560           else if (state == 3)
3561             {
3562               /* It is possible that we already have this collation sequence.
3563                  In this case we move the entry.  */
3564               void *sym;
3565               void *ptr;
3566
3567               /* If the symbol after which we have to insert was not found
3568                  ignore all entries.  */
3569               if (collate->cursor == NULL)
3570                 {
3571                   lr_ignore_rest (ldfile, 0);
3572                   break;
3573                 }
3574
3575               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3576                 {
3577                   seqp = (struct element_t *) ptr;
3578                   goto move_entry;
3579                 }
3580
3581               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3582                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3583                 goto move_entry;
3584
3585               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3586                   && (seqp = (struct element_t *) ptr,
3587                       seqp->last != NULL || seqp->next != NULL
3588                       || (collate->start != NULL && seqp == collate->start)))
3589                 {
3590                 move_entry:
3591                   /* Remove the entry from the old position.  */
3592                   if (seqp->last == NULL)
3593                     collate->start = seqp->next;
3594                   else
3595                     seqp->last->next = seqp->next;
3596                   if (seqp->next != NULL)
3597                     seqp->next->last = seqp->last;
3598
3599                   /* We also have to check whether this entry is the
3600                      first or last of a section.  */
3601                   if (seqp->section->first == seqp)
3602                     {
3603                       if (seqp->section->first == seqp->section->last)
3604                         /* This section has no content anymore.  */
3605                         seqp->section->first = seqp->section->last = NULL;
3606                       else
3607                         seqp->section->first = seqp->next;
3608                     }
3609                   else if (seqp->section->last == seqp)
3610                     seqp->section->last = seqp->last;
3611
3612                   /* Now insert it in the new place.  */
3613                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3614                                   tok_none);
3615                   break;
3616                 }
3617
3618               /* Otherwise we just add a new entry.  */
3619             }
3620           else if (state == 5)
3621             {
3622               /* We are reordering sections.  Find the named section.  */
3623               struct section_list *runp = collate->sections;
3624               struct section_list *prevp = NULL;
3625
3626               while (runp != NULL)
3627                 {
3628                   if (runp->name != NULL
3629                       && strlen (runp->name) == symlen
3630                       && memcmp (runp->name, symstr, symlen) == 0)
3631                     break;
3632
3633                   prevp = runp;
3634                   runp = runp->next;
3635                 }
3636
3637               if (runp == NULL)
3638                 {
3639                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3640                             "LC_COLLATE", (int) symlen, symstr);
3641                   lr_ignore_rest (ldfile, 0);
3642                 }
3643               else
3644                 {
3645                   if (runp != collate->current_section)
3646                     {
3647                       /* Remove the named section from the old place and
3648                          insert it in the new one.  */
3649                       prevp->next = runp->next;
3650
3651                       runp->next = collate->current_section->next;
3652                       collate->current_section->next = runp;
3653                       collate->current_section = runp;
3654                     }
3655
3656                   /* Process the rest of the line which might change
3657                      the collation rules.  */
3658                   arg = lr_token (ldfile, charmap, result, repertoire,
3659                                   verbose);
3660                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3661                     read_directions (ldfile, arg, charmap, repertoire,
3662                                      result);
3663                 }
3664               break;
3665             }
3666           else if (was_ellipsis != tok_none)
3667             {
3668               /* Using the information in the `ellipsis_weight'
3669                  element and this and the last value we have to handle
3670                  the ellipsis now.  */
3671               assert (state == 1);
3672
3673               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3674                                repertoire, result);
3675
3676               /* Remember that we processed the ellipsis.  */
3677               was_ellipsis = tok_none;
3678
3679               /* And don't add the value a second time.  */
3680               break;
3681             }
3682
3683           /* Now insert in the new place.  */
3684           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3685           break;
3686
3687         case tok_undefined:
3688           /* Ignore the rest of the line if we don't need the input of
3689              this line.  */
3690           if (ignore_content)
3691             {
3692               lr_ignore_rest (ldfile, 0);
3693               break;
3694             }
3695
3696           if (state != 1)
3697             goto err_label;
3698
3699           if (was_ellipsis != tok_none)
3700             {
3701               lr_error (ldfile,
3702                         _("%s: cannot have `%s' as end of ellipsis range"),
3703                         "LC_COLLATE", "UNDEFINED");
3704
3705               unlink_element (collate);
3706               was_ellipsis = tok_none;
3707             }
3708
3709           /* See whether UNDEFINED already appeared somewhere.  */
3710           if (collate->undefined.next != NULL
3711               || &collate->undefined == collate->cursor)
3712             {
3713               lr_error (ldfile,
3714                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3715                         "LC_COLLATE", 9, "UNDEFINED",
3716                         collate->undefined.file,
3717                         collate->undefined.line);
3718               lr_ignore_rest (ldfile, 0);
3719             }
3720           else
3721             /* Parse the weights.  */
3722              insert_weights (ldfile, &collate->undefined, charmap,
3723                              repertoire, result, tok_none);
3724           break;
3725
3726         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3727         case tok_ellipsis3: /* absolute ellipsis */
3728         case tok_ellipsis4: /* symbolic decimal ellipsis */
3729           /* This is the symbolic (decimal or hexadecimal) or absolute
3730              ellipsis.  */
3731           if (was_ellipsis != tok_none)
3732             goto err_label;
3733
3734           if (state != 0 && state != 1 && state != 3)
3735             goto err_label;
3736
3737           was_ellipsis = nowtok;
3738
3739           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3740                           repertoire, result, nowtok);
3741           break;
3742
3743         case tok_end:
3744         seen_end:
3745           /* Next we assume `LC_COLLATE'.  */
3746           if (!ignore_content)
3747             {
3748               if (state == 0 && copy_locale == NULL)
3749                 /* We must either see a copy statement or have
3750                    ordering values.  */
3751                 lr_error (ldfile,
3752                           _("%s: empty category description not allowed"),
3753                           "LC_COLLATE");
3754               else if (state == 1)
3755                 {
3756                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3757                             "LC_COLLATE");
3758
3759                   /* Handle ellipsis at end of list.  */
3760                   if (was_ellipsis != tok_none)
3761                     {
3762                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3763                                        repertoire, result);
3764                       was_ellipsis = tok_none;
3765                     }
3766                 }
3767               else if (state == 3)
3768                 WITH_CUR_LOCALE (error (0, 0, _("\
3769 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3770               else if (state == 5)
3771                 WITH_CUR_LOCALE (error (0, 0, _("\
3772 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3773             }
3774           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3775           if (arg->tok == tok_eof)
3776             break;
3777           if (arg->tok == tok_eol)
3778             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3779           else if (arg->tok != tok_lc_collate)
3780             lr_error (ldfile, _("\
3781 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3782           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3783           return;
3784
3785         case tok_define:
3786           if (ignore_content)
3787             {
3788               lr_ignore_rest (ldfile, 0);
3789               break;
3790             }
3791
3792           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3793           if (arg->tok != tok_ident)
3794             goto err_label;
3795
3796           /* Simply add the new symbol.  */
3797           struct name_list *newsym = xmalloc (sizeof (*newsym)
3798                                               + arg->val.str.lenmb + 1);
3799           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3800           newsym->str[arg->val.str.lenmb] = '\0';
3801           newsym->next = defined;
3802           defined = newsym;
3803
3804           lr_ignore_rest (ldfile, 1);
3805           break;
3806
3807         case tok_undef:
3808           if (ignore_content)
3809             {
3810               lr_ignore_rest (ldfile, 0);
3811               break;
3812             }
3813
3814           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3815           if (arg->tok != tok_ident)
3816             goto err_label;
3817
3818           /* Remove _all_ occurrences of the symbol from the list.  */
3819           struct name_list *prevdef = NULL;
3820           struct name_list *curdef = defined;
3821           while (curdef != NULL)
3822             if (strncmp (arg->val.str.startmb, curdef->str,
3823                          arg->val.str.lenmb) == 0
3824                 && curdef->str[arg->val.str.lenmb] == '\0')
3825               {
3826                 if (prevdef == NULL)
3827                   defined = curdef->next;
3828                 else
3829                   prevdef->next = curdef->next;
3830
3831                 struct name_list *olddef = curdef;
3832                 curdef = curdef->next;
3833
3834                 free (olddef);
3835               }
3836             else
3837               {
3838                 prevdef = curdef;
3839                 curdef = curdef->next;
3840               }
3841
3842           lr_ignore_rest (ldfile, 1);
3843           break;
3844
3845         case tok_ifdef:
3846         case tok_ifndef:
3847           if (ignore_content)
3848             {
3849               lr_ignore_rest (ldfile, 0);
3850               break;
3851             }
3852
3853         found_ifdef:
3854           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3855           if (arg->tok != tok_ident)
3856             goto err_label;
3857           lr_ignore_rest (ldfile, 1);
3858
3859           if (collate->else_action == else_none)
3860             {
3861               curdef = defined;
3862               while (curdef != NULL)
3863                 if (strncmp (arg->val.str.startmb, curdef->str,
3864                              arg->val.str.lenmb) == 0
3865                     && curdef->str[arg->val.str.lenmb] == '\0')
3866                   break;
3867                 else
3868                   curdef = curdef->next;
3869
3870               if ((nowtok == tok_ifdef && curdef != NULL)
3871                   || (nowtok == tok_ifndef && curdef == NULL))
3872                 {
3873                   /* We have to use the if-branch.  */
3874                   collate->else_action = else_ignore;
3875                 }
3876               else
3877                 {
3878                   /* We have to use the else-branch, if there is one.  */
3879                   nowtok = skip_to (ldfile, collate, charmap, 0);
3880                   if (nowtok == tok_else)
3881                     collate->else_action = else_seen;
3882                   else if (nowtok == tok_elifdef)
3883                     {
3884                       nowtok = tok_ifdef;
3885                       goto found_ifdef;
3886                     }
3887                   else if (nowtok == tok_elifndef)
3888                     {
3889                       nowtok = tok_ifndef;
3890                       goto found_ifdef;
3891                     }
3892                   else if (nowtok == tok_eof)
3893                     goto seen_eof;
3894                   else if (nowtok == tok_end)
3895                     goto seen_end;
3896                 }
3897             }
3898           else
3899             {
3900               /* XXX Should it really become necessary to support nested
3901                  preprocessor handling we will push the state here.  */
3902               lr_error (ldfile, _("%s: nested conditionals not supported"),
3903                         "LC_COLLATE");
3904               nowtok = skip_to (ldfile, collate, charmap, 1);
3905               if (nowtok == tok_eof)
3906                 goto seen_eof;
3907               else if (nowtok == tok_end)
3908                 goto seen_end;
3909             }
3910           break;
3911
3912         case tok_elifdef:
3913         case tok_elifndef:
3914         case tok_else:
3915           if (ignore_content)
3916             {
3917               lr_ignore_rest (ldfile, 0);
3918               break;
3919             }
3920
3921           lr_ignore_rest (ldfile, 1);
3922
3923           if (collate->else_action == else_ignore)
3924             {
3925               /* Ignore everything until the endif.  */
3926               nowtok = skip_to (ldfile, collate, charmap, 1);
3927               if (nowtok == tok_eof)
3928                 goto seen_eof;
3929               else if (nowtok == tok_end)
3930                 goto seen_end;
3931             }
3932           else
3933             {
3934               assert (collate->else_action == else_none);
3935               lr_error (ldfile, _("\
3936 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3937                         nowtok == tok_else ? "else"
3938                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3939             }
3940           break;
3941
3942         case tok_endif:
3943           if (ignore_content)
3944             {
3945               lr_ignore_rest (ldfile, 0);
3946               break;
3947             }
3948
3949           lr_ignore_rest (ldfile, 1);
3950
3951           if (collate->else_action != else_ignore
3952               && collate->else_action != else_seen)
3953             lr_error (ldfile, _("\
3954 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3955
3956           /* XXX If we support nested preprocessor directives we pop
3957              the state here.  */
3958           collate->else_action = else_none;
3959           break;
3960
3961         default:
3962         err_label:
3963           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3964         }
3965
3966       /* Prepare for the next round.  */
3967       now = lr_token (ldfile, charmap, result, NULL, verbose);
3968       nowtok = now->tok;
3969     }
3970
3971  seen_eof:
3972   /* When we come here we reached the end of the file.  */
3973   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3974 }