locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2014 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <errno.h>
  23 #include <error.h>
  24 #include <stdlib.h>
  25 #include <wchar.h>
  26 #include <stdint.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35
  36 /* Uncomment the following line in the production version.  */
  37 /* #define NDEBUG 1 */
  38 #include <assert.h>
  39
  40 #define obstack_chunk_alloc malloc
  41 #define obstack_chunk_free free
  42
  43 static inline void
  44 __attribute ((always_inline))
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  48   data = maybe_swap_uint32 (data);
  49   if (sizeof (int32_t) == sizeof (int))
  50     obstack_int_grow (obstack, data);
  51   else
  52     obstack_grow (obstack, &data, sizeof (int32_t));
  53 }
  54
  55 static inline void
  56 __attribute ((always_inline))
  57 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  58 {
  59   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
  60   data = maybe_swap_uint32 (data);
  61   if (sizeof (int32_t) == sizeof (int))
  62     obstack_int_grow_fast (obstack, data);
  63   else
  64     obstack_grow (obstack, &data, sizeof (int32_t));
  65 }
  66
  67 /* Forward declaration.  */
  68 struct element_t;
  69
  70 /* Data type for list of strings.  */
  71 struct section_list
  72 {
  73   /* Successor in the known_sections list.  */
  74   struct section_list *def_next;
  75   /* Successor in the sections list.  */
  76   struct section_list *next;
  77   /* Name of the section.  */
  78   const char *name;
  79   /* First element of this section.  */
  80   struct element_t *first;
  81   /* Last element of this section.  */
  82   struct element_t *last;
  83   /* These are the rules for this section.  */
  84   enum coll_sort_rule *rules;
  85   /* Index of the rule set in the appropriate section of the output file.  */
  86   int ruleidx;
  87 };
  88
  89 struct element_t;
  90
  91 struct element_list_t
  92 {
  93   /* Number of elements.  */
  94   int cnt;
  95
  96   struct element_t **w;
  97 };
  98
  99 /* Data type for collating element.  */
 100 struct element_t
 101 {
 102   const char *name;
 103
 104   const char *mbs;
 105   size_t nmbs;
 106   const uint32_t *wcs;
 107   size_t nwcs;
 108   int *mborder;
 109   int wcorder;
 110
 111   /* The following is a bit mask which bits are set if this element is
 112      used in the appropriate level.  Interesting for the singlebyte
 113      weight computation.
 114
 115      XXX The type here restricts the number of levels to 32.  It could
 116      be changed if necessary but I doubt this is necessary.  */
 117   unsigned int used_in_level;
 118
 119   struct element_list_t *weights;
 120
 121   /* Nonzero if this is a real character definition.  */
 122   int is_character;
 123
 124   /* Order of the character in the sequence.  This information will
 125      be used in range expressions.  */
 126   int mbseqorder;
 127   int wcseqorder;
 128
 129   /* Where does the definition come from.  */
 130   const char *file;
 131   size_t line;
 132
 133   /* Which section does this belong to.  */
 134   struct section_list *section;
 135
 136   /* Predecessor and successor in the order list.  */
 137   struct element_t *last;
 138   struct element_t *next;
 139
 140   /* Next element in multibyte output list.  */
 141   struct element_t *mbnext;
 142   struct element_t *mblast;
 143
 144   /* Next element in wide character output list.  */
 145   struct element_t *wcnext;
 146   struct element_t *wclast;
 147 };
 148
 149 /* Special element value.  */
 150 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 151 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 152 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 153
 154 /* Data type for collating symbol.  */
 155 struct symbol_t
 156 {
 157   const char *name;
 158
 159   /* Point to place in the order list.  */
 160   struct element_t *order;
 161
 162   /* Where does the definition come from.  */
 163   const char *file;
 164   size_t line;
 165 };
 166
 167 /* Sparse table of struct element_t *.  */
 168 #define TABLE wchead_table
 169 #define ELEMENT struct element_t *
 170 #define DEFAULT NULL
 171 #define ITERATE
 172 #define NO_ADD_LOCALE
 173 #include "3level.h"
 174
 175 /* Sparse table of int32_t.  */
 176 #define TABLE collidx_table
 177 #define ELEMENT int32_t
 178 #define DEFAULT 0
 179 #include "3level.h"
 180
 181 /* Sparse table of uint32_t.  */
 182 #define TABLE collseq_table
 183 #define ELEMENT uint32_t
 184 #define DEFAULT ~((uint32_t) 0)
 185 #include "3level.h"
 186
 187
 188 /* Simple name list for the preprocessor.  */
 189 struct name_list
 190 {
 191   struct name_list *next;
 192   char str[0];
 193 };
 194
 195
 196 /* The real definition of the struct for the LC_COLLATE locale.  */
 197 struct locale_collate_t
 198 {
 199   int col_weight_max;
 200   int cur_weight_max;
 201
 202   /* List of known scripts.  */
 203   struct section_list *known_sections;
 204   /* List of used sections.  */
 205   struct section_list *sections;
 206   /* Current section using definition.  */
 207   struct section_list *current_section;
 208   /* There always can be an unnamed section.  */
 209   struct section_list unnamed_section;
 210   /* Flag whether the unnamed section has been defined.  */
 211   bool unnamed_section_defined;
 212   /* To make handling of errors easier we have another section.  */
 213   struct section_list error_section;
 214   /* Sometimes we are defining the values for collating symbols before
 215      the first actual section.  */
 216   struct section_list symbol_section;
 217
 218   /* Start of the order list.  */
 219   struct element_t *start;
 220
 221   /* The undefined element.  */
 222   struct element_t undefined;
 223
 224   /* This is the cursor for `reorder_after' insertions.  */
 225   struct element_t *cursor;
 226
 227   /* This value is used when handling ellipsis.  */
 228   struct element_t ellipsis_weight;
 229
 230   /* Known collating elements.  */
 231   hash_table elem_table;
 232
 233   /* Known collating symbols.  */
 234   hash_table sym_table;
 235
 236   /* Known collation sequences.  */
 237   hash_table seq_table;
 238
 239   struct obstack mempool;
 240
 241   /* The LC_COLLATE category is a bit special as it is sometimes possible
 242      that the definitions from more than one input file contains information.
 243      Therefore we keep all relevant input in a list.  */
 244   struct locale_collate_t *next;
 245
 246   /* Arrays with heads of the list for each of the leading bytes in
 247      the multibyte sequences.  */
 248   struct element_t *mbheads[256];
 249
 250   /* Arrays with heads of the list for each of the leading bytes in
 251      the multibyte sequences.  */
 252   struct wchead_table wcheads;
 253
 254   /* The arrays with the collation sequence order.  */
 255   unsigned char mbseqorder[256];
 256   struct collseq_table wcseqorder;
 257
 258   /* State of the preprocessor.  */
 259   enum
 260     {
 261       else_none = 0,
 262       else_ignore,
 263       else_seen
 264     }
 265     else_action;
 266 };
 267
 268
 269 /* We have a few global variables which are used for reading all
 270    LC_COLLATE category descriptions in all files.  */
 271 static uint32_t nrules;
 272
 273 /* List of defined preprocessor symbols.  */
 274 static struct name_list *defined;
 275
 276
 277 /* We need UTF-8 encoding of numbers.  */
 278 static inline int
 279 __attribute ((always_inline))
 280 utf8_encode (char *buf, int val)
 281 {
 282   int retval;
 283
 284   if (val < 0x80)
 285     {
 286       *buf++ = (char) val;
 287       retval = 1;
 288     }
 289   else
 290     {
 291       int step;
 292
 293       for (step = 2; step < 6; ++step)
 294         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 295           break;
 296       retval = step;
 297
 298       *buf = (unsigned char) (~0xff >> step);
 299       --step;
 300       do
 301         {
 302           buf[step] = 0x80 | (val & 0x3f);
 303           val >>= 6;
 304         }
 305       while (--step > 0);
 306       *buf |= val;
 307     }
 308
 309   return retval;
 310 }
 311
 312
 313 static struct section_list *
 314 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 315                    struct section_list *next)
 316 {
 317   struct section_list *newp;
 318
 319   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 320                                                 sizeof (*newp));
 321   newp->next = next;
 322   newp->name = string;
 323   newp->first = NULL;
 324   newp->last = NULL;
 325
 326   return newp;
 327 }
 328
 329
 330 static struct element_t *
 331 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 332              const uint32_t *wcs, const char *name, size_t namelen,
 333              int is_character)
 334 {
 335   struct element_t *newp;
 336
 337   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 338                                              sizeof (*newp));
 339   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 340                                                     name, namelen);
 341   if (mbs != NULL)
 342     {
 343       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 344       newp->nmbs = mbslen;
 345     }
 346   else
 347     {
 348       newp->mbs = NULL;
 349       newp->nmbs = 0;
 350     }
 351   if (wcs != NULL)
 352     {
 353       size_t nwcs = wcslen ((wchar_t *) wcs);
 354       uint32_t zero = 0;
 355       /* Handle <U0000> as a single character.  */
 356       if (nwcs == 0)
 357         nwcs = 1;
 358       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 359       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 360       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 361       newp->nwcs = nwcs;
 362     }
 363   else
 364     {
 365       newp->wcs = NULL;
 366       newp->nwcs = 0;
 367     }
 368   newp->mborder = NULL;
 369   newp->wcorder = 0;
 370   newp->used_in_level = 0;
 371   newp->is_character = is_character;
 372
 373   /* Will be assigned later.  XXX  */
 374   newp->mbseqorder = 0;
 375   newp->wcseqorder = 0;
 376
 377   /* Will be allocated later.  */
 378   newp->weights = NULL;
 379
 380   newp->file = NULL;
 381   newp->line = 0;
 382
 383   newp->section = collate->current_section;
 384
 385   newp->last = NULL;
 386   newp->next = NULL;
 387
 388   newp->mbnext = NULL;
 389   newp->mblast = NULL;
 390
 391   newp->wcnext = NULL;
 392   newp->wclast = NULL;
 393
 394   return newp;
 395 }
 396
 397
 398 static struct symbol_t *
 399 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 400 {
 401   struct symbol_t *newp;
 402
 403   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 404
 405   newp->name = obstack_copy0 (&collate->mempool, name, len);
 406   newp->order = NULL;
 407
 408   newp->file = NULL;
 409   newp->line = 0;
 410
 411   return newp;
 412 }
 413
 414
 415 /* Test whether this name is already defined somewhere.  */
 416 static int
 417 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 418                  const struct charmap_t *charmap,
 419                  struct repertoire_t *repertoire, const char *symbol,
 420                  size_t symbol_len)
 421 {
 422   void *ignore = NULL;
 423
 424   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 425     {
 426       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 427                 (int) symbol_len, symbol);
 428       return 1;
 429     }
 430
 431   if (repertoire != NULL
 432       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 433           == 0))
 434     {
 435       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 436                 (int) symbol_len, symbol);
 437       return 1;
 438     }
 439
 440   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 441     {
 442       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 443                 (int) symbol_len, symbol);
 444       return 1;
 445     }
 446
 447   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 448     {
 449       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 450                 (int) symbol_len, symbol);
 451       return 1;
 452     }
 453
 454   return 0;
 455 }
 456
 457
 458 /* Read the direction specification.  */
 459 static void
 460 read_directions (struct linereader *ldfile, struct token *arg,
 461                  const struct charmap_t *charmap,
 462                  struct repertoire_t *repertoire, struct localedef_t *result)
 463 {
 464   int cnt = 0;
 465   int max = nrules ?: 10;
 466   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 467   int warned = 0;
 468   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 469
 470   while (1)
 471     {
 472       int valid = 0;
 473
 474       if (arg->tok == tok_forward)
 475         {
 476           if (rules[cnt] & sort_backward)
 477             {
 478               if (! warned)
 479                 {
 480                   lr_error (ldfile, _("\
 481 %s: `forward' and `backward' are mutually excluding each other"),
 482                             "LC_COLLATE");
 483                   warned = 1;
 484                 }
 485             }
 486           else if (rules[cnt] & sort_forward)
 487             {
 488               if (! warned)
 489                 {
 490                   lr_error (ldfile, _("\
 491 %s: `%s' mentioned more than once in definition of weight %d"),
 492                             "LC_COLLATE", "forward", cnt + 1);
 493                 }
 494             }
 495           else
 496             rules[cnt] |= sort_forward;
 497
 498           valid = 1;
 499         }
 500       else if (arg->tok == tok_backward)
 501         {
 502           if (rules[cnt] & sort_forward)
 503             {
 504               if (! warned)
 505                 {
 506                   lr_error (ldfile, _("\
 507 %s: `forward' and `backward' are mutually excluding each other"),
 508                             "LC_COLLATE");
 509                   warned = 1;
 510                 }
 511             }
 512           else if (rules[cnt] & sort_backward)
 513             {
 514               if (! warned)
 515                 {
 516                   lr_error (ldfile, _("\
 517 %s: `%s' mentioned more than once in definition of weight %d"),
 518                             "LC_COLLATE", "backward", cnt + 1);
 519                 }
 520             }
 521           else
 522             rules[cnt] |= sort_backward;
 523
 524           valid = 1;
 525         }
 526       else if (arg->tok == tok_position)
 527         {
 528           if (rules[cnt] & sort_position)
 529             {
 530               if (! warned)
 531                 {
 532                   lr_error (ldfile, _("\
 533 %s: `%s' mentioned more than once in definition of weight %d"),
 534                             "LC_COLLATE", "position", cnt + 1);
 535                 }
 536             }
 537           else
 538             rules[cnt] |= sort_position;
 539
 540           valid = 1;
 541         }
 542
 543       if (valid)
 544         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 545
 546       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 547           || arg->tok == tok_semicolon)
 548         {
 549           if (! valid && ! warned)
 550             {
 551               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 552               warned = 1;
 553             }
 554
 555           /* See whether we have to increment the counter.  */
 556           if (arg->tok != tok_comma && rules[cnt] != 0)
 557             {
 558               /* Add the default `forward' if we have seen only `position'.  */
 559               if (rules[cnt] == sort_position)
 560                 rules[cnt] = sort_position | sort_forward;
 561
 562               ++cnt;
 563             }
 564
 565           if (arg->tok == tok_eof || arg->tok == tok_eol)
 566             /* End of line or file, so we exit the loop.  */
 567             break;
 568
 569           if (nrules == 0)
 570             {
 571               /* See whether we have enough room in the array.  */
 572               if (cnt == max)
 573                 {
 574                   max += 10;
 575                   rules = (enum coll_sort_rule *) xrealloc (rules,
 576                                                             max
 577                                                             * sizeof (*rules));
 578                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 579                 }
 580             }
 581           else
 582             {
 583               if (cnt == nrules)
 584                 {
 585                   /* There must not be any more rule.  */
 586                   if (! warned)
 587                     {
 588                       lr_error (ldfile, _("\
 589 %s: too many rules; first entry only had %d"),
 590                                 "LC_COLLATE", nrules);
 591                       warned = 1;
 592                     }
 593
 594                   lr_ignore_rest (ldfile, 0);
 595                   break;
 596                 }
 597             }
 598         }
 599       else
 600         {
 601           if (! warned)
 602             {
 603               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 604               warned = 1;
 605             }
 606         }
 607
 608       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 609     }
 610
 611   if (nrules == 0)
 612     {
 613       /* Now we know how many rules we have.  */
 614       nrules = cnt;
 615       rules = (enum coll_sort_rule *) xrealloc (rules,
 616                                                 nrules * sizeof (*rules));
 617     }
 618   else
 619     {
 620       if (cnt < nrules)
 621         {
 622           /* Not enough rules in this specification.  */
 623           if (! warned)
 624             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 625
 626           do
 627             rules[cnt] = sort_forward;
 628           while (++cnt < nrules);
 629         }
 630     }
 631
 632   collate->current_section->rules = rules;
 633 }
 634
 635
 636 static struct element_t *
 637 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 638               const char *str, size_t len)
 639 {
 640   void *result = NULL;
 641
 642   /* Search for the entries among the collation sequences already define.  */
 643   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 644     {
 645       /* Nope, not define yet.  So we see whether it is a
 646          collation symbol.  */
 647       void *ptr;
 648
 649       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 650         {
 651           /* It's a collation symbol.  */
 652           struct symbol_t *sym = (struct symbol_t *) ptr;
 653           result = sym->order;
 654
 655           if (result == NULL)
 656             result = sym->order = new_element (collate, NULL, 0, NULL,
 657                                                NULL, 0, 0);
 658         }
 659       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 660         {
 661           /* It's also no collation element.  So it is a character
 662              element defined later.  */
 663           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 664           /* Insert it into the sequence table.  */
 665           insert_entry (&collate->seq_table, str, len, result);
 666         }
 667     }
 668
 669   return (struct element_t *) result;
 670 }
 671
 672
 673 static void
 674 unlink_element (struct locale_collate_t *collate)
 675 {
 676   if (collate->cursor == collate->start)
 677     {
 678       assert (collate->cursor->next == NULL);
 679       assert (collate->cursor->last == NULL);
 680       collate->cursor = NULL;
 681     }
 682   else
 683     {
 684       if (collate->cursor->next != NULL)
 685         collate->cursor->next->last = collate->cursor->last;
 686       if (collate->cursor->last != NULL)
 687         collate->cursor->last->next = collate->cursor->next;
 688       collate->cursor = collate->cursor->last;
 689     }
 690 }
 691
 692
 693 static void
 694 insert_weights (struct linereader *ldfile, struct element_t *elem,
 695                 const struct charmap_t *charmap,
 696                 struct repertoire_t *repertoire, struct localedef_t *result,
 697                 enum token_t ellipsis)
 698 {
 699   int weight_cnt;
 700   struct token *arg;
 701   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 702
 703   /* Initialize all the fields.  */
 704   elem->file = ldfile->fname;
 705   elem->line = ldfile->lineno;
 706
 707   elem->last = collate->cursor;
 708   elem->next = collate->cursor ? collate->cursor->next : NULL;
 709   if (collate->cursor != NULL && collate->cursor->next != NULL)
 710     collate->cursor->next->last = elem;
 711   if (collate->cursor != NULL)
 712     collate->cursor->next = elem;
 713   if (collate->start == NULL)
 714     {
 715       assert (collate->cursor == NULL);
 716       collate->start = elem;
 717     }
 718
 719   elem->section = collate->current_section;
 720
 721   if (collate->current_section->first == NULL)
 722     collate->current_section->first = elem;
 723   if (collate->current_section->last == collate->cursor)
 724     collate->current_section->last = elem;
 725
 726   collate->cursor = elem;
 727
 728   elem->weights = (struct element_list_t *)
 729     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 730   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 731
 732   weight_cnt = 0;
 733
 734   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 735   do
 736     {
 737       if (arg->tok == tok_eof || arg->tok == tok_eol)
 738         break;
 739
 740       if (arg->tok == tok_ignore)
 741         {
 742           /* The weight for this level has to be ignored.  We use the
 743              null pointer to indicate this.  */
 744           elem->weights[weight_cnt].w = (struct element_t **)
 745             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 746           elem->weights[weight_cnt].w[0] = NULL;
 747           elem->weights[weight_cnt].cnt = 1;
 748         }
 749       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 750         {
 751           char ucs4str[10];
 752           struct element_t *val;
 753           char *symstr;
 754           size_t symlen;
 755
 756           if (arg->tok == tok_bsymbol)
 757             {
 758               symstr = arg->val.str.startmb;
 759               symlen = arg->val.str.lenmb;
 760             }
 761           else
 762             {
 763               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 764               symstr = ucs4str;
 765               symlen = 9;
 766             }
 767
 768           val = find_element (ldfile, collate, symstr, symlen);
 769           if (val == NULL)
 770             break;
 771
 772           elem->weights[weight_cnt].w = (struct element_t **)
 773             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 774           elem->weights[weight_cnt].w[0] = val;
 775           elem->weights[weight_cnt].cnt = 1;
 776         }
 777       else if (arg->tok == tok_string)
 778         {
 779           /* Split the string up in the individual characters and put
 780              the element definitions in the list.  */
 781           const char *cp = arg->val.str.startmb;
 782           int cnt = 0;
 783           struct element_t *charelem;
 784           struct element_t **weights = NULL;
 785           int max = 0;
 786
 787           if (*cp == '\0')
 788             {
 789               lr_error (ldfile, _("%s: empty weight string not allowed"),
 790                         "LC_COLLATE");
 791               lr_ignore_rest (ldfile, 0);
 792               break;
 793             }
 794
 795           do
 796             {
 797               if (*cp == '<')
 798                 {
 799                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 800                      the latter we have to unify the name.  */
 801                   const char *startp = ++cp;
 802                   size_t len;
 803
 804                   while (*cp != '>')
 805                     {
 806                       if (*cp == ldfile->escape_char)
 807                         ++cp;
 808                       if (*cp == '\0')
 809                         /* It's a syntax error.  */
 810                         goto syntax;
 811
 812                       ++cp;
 813                     }
 814
 815                   if (cp - startp == 5 && startp[0] == 'U'
 816                       && isxdigit (startp[1]) && isxdigit (startp[2])
 817                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 818                     {
 819                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 820                       char *newstr;
 821
 822                       newstr = (char *) xmalloc (10);
 823                       snprintf (newstr, 10, "U%08X", ucs4);
 824                       startp = newstr;
 825
 826                       len = 9;
 827                     }
 828                   else
 829                     len = cp - startp;
 830
 831                   charelem = find_element (ldfile, collate, startp, len);
 832                   ++cp;
 833                 }
 834               else
 835                 {
 836                   /* People really shouldn't use characters directly in
 837                      the string.  Especially since it's not really clear
 838                      what this means.  We interpret all characters in the
 839                      string as if that would be bsymbols.  Otherwise we
 840                      would have to match back to bsymbols somehow and this
 841                      is normally not what people normally expect.  */
 842                   charelem = find_element (ldfile, collate, cp++, 1);
 843                 }
 844
 845               if (charelem == NULL)
 846                 {
 847                   /* We ignore the rest of the line.  */
 848                   lr_ignore_rest (ldfile, 0);
 849                   break;
 850                 }
 851
 852               /* Add the pointer.  */
 853               if (cnt >= max)
 854                 {
 855                   struct element_t **newp;
 856                   max += 10;
 857                   newp = (struct element_t **)
 858                     alloca (max * sizeof (struct element_t *));
 859                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 860                   weights = newp;
 861                 }
 862               weights[cnt++] = charelem;
 863             }
 864           while (*cp != '\0');
 865
 866           /* Now store the information.  */
 867           elem->weights[weight_cnt].w = (struct element_t **)
 868             obstack_alloc (&collate->mempool,
 869                            cnt * sizeof (struct element_t *));
 870           memcpy (elem->weights[weight_cnt].w, weights,
 871                   cnt * sizeof (struct element_t *));
 872           elem->weights[weight_cnt].cnt = cnt;
 873
 874           /* We don't need the string anymore.  */
 875           free (arg->val.str.startmb);
 876         }
 877       else if (ellipsis != tok_none
 878                && (arg->tok == tok_ellipsis2
 879                    || arg->tok == tok_ellipsis3
 880                    || arg->tok == tok_ellipsis4))
 881         {
 882           /* It must be the same ellipsis as used in the initial column.  */
 883           if (arg->tok != ellipsis)
 884             lr_error (ldfile, _("\
 885 %s: weights must use the same ellipsis symbol as the name"),
 886                       "LC_COLLATE");
 887
 888           /* The weight for this level will depend on the element
 889              iterating over the range.  Put a placeholder.  */
 890           elem->weights[weight_cnt].w = (struct element_t **)
 891             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 892           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 893           elem->weights[weight_cnt].cnt = 1;
 894         }
 895       else
 896         {
 897         syntax:
 898           /* It's a syntax error.  */
 899           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 900           lr_ignore_rest (ldfile, 0);
 901           break;
 902         }
 903
 904       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 905       /* This better should be the end of the line or a semicolon.  */
 906       if (arg->tok == tok_semicolon)
 907         /* OK, ignore this and read the next token.  */
 908         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 909       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 910         {
 911           /* It's a syntax error.  */
 912           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 913           lr_ignore_rest (ldfile, 0);
 914           break;
 915         }
 916     }
 917   while (++weight_cnt < nrules);
 918
 919   if (weight_cnt < nrules)
 920     {
 921       /* This means the rest of the line uses the current element as
 922          the weight.  */
 923       do
 924         {
 925           elem->weights[weight_cnt].w = (struct element_t **)
 926             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 927           if (ellipsis == tok_none)
 928             elem->weights[weight_cnt].w[0] = elem;
 929           else
 930             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 931           elem->weights[weight_cnt].cnt = 1;
 932         }
 933       while (++weight_cnt < nrules);
 934     }
 935   else
 936     {
 937       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 938         {
 939           /* Too many rule values.  */
 940           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 941           lr_ignore_rest (ldfile, 0);
 942         }
 943       else
 944         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 945     }
 946 }
 947
 948
 949 static int
 950 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 951               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 952               struct localedef_t *result)
 953 {
 954   /* First find out what kind of symbol this is.  */
 955   struct charseq *seq;
 956   uint32_t wc;
 957   struct element_t *elem = NULL;
 958   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 959
 960   /* Try to find the character in the charmap.  */
 961   seq = charmap_find_value (charmap, symstr, symlen);
 962
 963   /* Determine the wide character.  */
 964   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 965     {
 966       wc = repertoire_find_value (repertoire, symstr, symlen);
 967       if (seq != NULL)
 968         seq->ucs4 = wc;
 969     }
 970   else
 971     wc = seq->ucs4;
 972
 973   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 974     {
 975       /* It's no character, so look through the collation elements and
 976          symbol list.  */
 977       void *ptr = elem;
 978       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 979         {
 980           void *result;
 981           struct symbol_t *sym = NULL;
 982
 983           /* It's also collation element.  Therefore it's either a
 984              collating symbol or it's a character which is not
 985              supported by the character set.  In the later case we
 986              simply create a dummy entry.  */
 987           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 988             {
 989               /* It's a collation symbol.  */
 990               sym = (struct symbol_t *) result;
 991
 992               elem = sym->order;
 993             }
 994
 995           if (elem == NULL)
 996             {
 997               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 998
 999               if (sym != NULL)
1000                 sym->order = elem;
1001               else
1002                 /* Enter a fake element in the sequence table.  This
1003                    won't cause anything in the output since there is
1004                    no multibyte or wide character associated with
1005                    it.  */
1006                 insert_entry (&collate->seq_table, symstr, symlen, elem);
1007             }
1008         }
1009       else
1010         /* Copy the result back.  */
1011         elem = ptr;
1012     }
1013   else
1014     {
1015       /* Otherwise the symbols stands for a character.  */
1016       void *ptr = elem;
1017       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1018         {
1019           uint32_t wcs[2] = { wc, 0 };
1020
1021           /* We have to allocate an entry.  */
1022           elem = new_element (collate,
1023                               seq != NULL ? (char *) seq->bytes : NULL,
1024                               seq != NULL ? seq->nbytes : 0,
1025                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1026                               symstr, symlen, 1);
1027
1028           /* And add it to the table.  */
1029           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1030             /* This cannot happen.  */
1031             assert (! "Internal error");
1032         }
1033       else
1034         {
1035           /* Copy the result back.  */
1036           elem = ptr;
1037
1038           /* Maybe the character was used before the definition.  In this case
1039              we have to insert the byte sequences now.  */
1040           if (elem->mbs == NULL && seq != NULL)
1041             {
1042               elem->mbs = obstack_copy0 (&collate->mempool,
1043                                          seq->bytes, seq->nbytes);
1044               elem->nmbs = seq->nbytes;
1045             }
1046
1047           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1048             {
1049               uint32_t wcs[2] = { wc, 0 };
1050
1051               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1052               elem->nwcs = 1;
1053             }
1054         }
1055     }
1056
1057   /* Test whether this element is not already in the list.  */
1058   if (elem->next != NULL || elem == collate->cursor)
1059     {
1060       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1061                 (int) symlen, symstr, elem->file, elem->line);
1062       lr_ignore_rest (ldfile, 0);
1063       return 1;
1064     }
1065
1066   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1067
1068   return 0;
1069 }
1070
1071
1072 static void
1073 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1074                  enum token_t ellipsis, const struct charmap_t *charmap,
1075                  struct repertoire_t *repertoire,
1076                  struct localedef_t *result)
1077 {
1078   struct element_t *startp;
1079   struct element_t *endp;
1080   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1081
1082   /* Unlink the entry added for the ellipsis.  */
1083   unlink_element (collate);
1084   startp = collate->cursor;
1085
1086   /* Process and add the end-entry.  */
1087   if (symstr != NULL
1088       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1089     /* Something went wrong with inserting the to-value.  This means
1090        we cannot process the ellipsis.  */
1091     return;
1092
1093   /* Reset the cursor.  */
1094   collate->cursor = startp;
1095
1096   /* Now we have to handle many different situations:
1097      - we have to distinguish between the three different ellipsis forms
1098      - the is the ellipsis at the beginning, in the middle, or at the end.
1099   */
1100   endp = collate->cursor->next;
1101   assert (symstr == NULL || endp != NULL);
1102
1103   /* XXX The following is probably very wrong since also collating symbols
1104      can appear in ranges.  But do we want/can refine the test for that?  */
1105 #if 0
1106   /* Both, the start and the end symbol, must stand for characters.  */
1107   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1108       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1109     {
1110       lr_error (ldfile, _("\
1111 %s: the start and the end symbol of a range must stand for characters"),
1112                 "LC_COLLATE");
1113       return;
1114     }
1115 #endif
1116
1117   if (ellipsis == tok_ellipsis3)
1118     {
1119       /* One requirement we make here: the length of the byte
1120          sequences for the first and end character must be the same.
1121          This is mainly to prevent unwanted effects and this is often
1122          not what is wanted.  */
1123       size_t len = (startp->mbs != NULL ? startp->nmbs
1124                     : (endp->mbs != NULL ? endp->nmbs : 0));
1125       char mbcnt[len + 1];
1126       char mbend[len + 1];
1127
1128       /* Well, this should be caught somewhere else already.  Just to
1129          make sure.  */
1130       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1131       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1132
1133       if (startp != NULL && endp != NULL
1134           && startp->mbs != NULL && endp->mbs != NULL
1135           && startp->nmbs != endp->nmbs)
1136         {
1137           lr_error (ldfile, _("\
1138 %s: byte sequences of first and last character must have the same length"),
1139                     "LC_COLLATE");
1140           return;
1141         }
1142
1143       /* Determine whether we have to generate multibyte sequences.  */
1144       if ((startp == NULL || startp->mbs != NULL)
1145           && (endp == NULL || endp->mbs != NULL))
1146         {
1147           int cnt;
1148           int ret;
1149
1150           /* Prepare the beginning byte sequence.  This is either from the
1151              beginning byte sequence or it is all nulls if it was an
1152              initial ellipsis.  */
1153           if (startp == NULL || startp->mbs == NULL)
1154             memset (mbcnt, '\0', len);
1155           else
1156             {
1157               memcpy (mbcnt, startp->mbs, len);
1158
1159               /* And increment it so that the value is the first one we will
1160                  try to insert.  */
1161               for (cnt = len - 1; cnt >= 0; --cnt)
1162                 if (++mbcnt[cnt] != '\0')
1163                   break;
1164             }
1165           mbcnt[len] = '\0';
1166
1167           /* And the end sequence.  */
1168           if (endp == NULL || endp->mbs == NULL)
1169             memset (mbend, '\0', len);
1170           else
1171             memcpy (mbend, endp->mbs, len);
1172           mbend[len] = '\0';
1173
1174           /* Test whether we have a correct range.  */
1175           ret = memcmp (mbcnt, mbend, len);
1176           if (ret >= 0)
1177             {
1178               if (ret > 0)
1179                 lr_error (ldfile, _("%s: byte sequence of first character of \
1180 range is not lower than that of the last character"), "LC_COLLATE");
1181               return;
1182             }
1183
1184           /* Generate the byte sequences data.  */
1185           while (1)
1186             {
1187               struct charseq *seq;
1188
1189               /* Quite a bit of work ahead.  We have to find the character
1190                  definition for the byte sequence and then determine the
1191                  wide character belonging to it.  */
1192               seq = charmap_find_symbol (charmap, mbcnt, len);
1193               if (seq != NULL)
1194                 {
1195                   struct element_t *elem;
1196                   size_t namelen;
1197
1198                   /* I don't think this can ever happen.  */
1199                   assert (seq->name != NULL);
1200                   namelen = strlen (seq->name);
1201
1202                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1203                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1204                                                        namelen);
1205
1206                   /* Now we are ready to insert the new value in the
1207                      sequence.  Find out whether the element is
1208                      already known.  */
1209                   void *ptr;
1210                   if (find_entry (&collate->seq_table, seq->name, namelen,
1211                                   &ptr) != 0)
1212                     {
1213                       uint32_t wcs[2] = { seq->ucs4, 0 };
1214
1215                       /* We have to allocate an entry.  */
1216                       elem = new_element (collate, mbcnt, len,
1217                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1218                                           ? NULL : wcs, seq->name,
1219                                           namelen, 1);
1220
1221                       /* And add it to the table.  */
1222                       if (insert_entry (&collate->seq_table, seq->name,
1223                                         namelen, elem) != 0)
1224                         /* This cannot happen.  */
1225                         assert (! "Internal error");
1226                     }
1227                   else
1228                     /* Copy the result.  */
1229                     elem = ptr;
1230
1231                   /* Test whether this element is not already in the list.  */
1232                   if (elem->next != NULL || (collate->cursor != NULL
1233                                              && elem->next == collate->cursor))
1234                     {
1235                       lr_error (ldfile, _("\
1236 order for `%.*s' already defined at %s:%Zu"),
1237                                 (int) namelen, seq->name,
1238                                 elem->file, elem->line);
1239                       goto increment;
1240                     }
1241
1242                   /* Enqueue the new element.  */
1243                   elem->last = collate->cursor;
1244                   if (collate->cursor == NULL)
1245                     elem->next = NULL;
1246                   else
1247                     {
1248                       elem->next = collate->cursor->next;
1249                       elem->last->next = elem;
1250                       if (elem->next != NULL)
1251                         elem->next->last = elem;
1252                     }
1253                   if (collate->start == NULL)
1254                     {
1255                       assert (collate->cursor == NULL);
1256                       collate->start = elem;
1257                     }
1258                   collate->cursor = elem;
1259
1260                  /* Add the weight value.  We take them from the
1261                     `ellipsis_weights' member of `collate'.  */
1262                   elem->weights = (struct element_list_t *)
1263                     obstack_alloc (&collate->mempool,
1264                                    nrules * sizeof (struct element_list_t));
1265                   for (cnt = 0; cnt < nrules; ++cnt)
1266                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1267                         && (collate->ellipsis_weight.weights[cnt].w[0]
1268                             == ELEMENT_ELLIPSIS2))
1269                       {
1270                         elem->weights[cnt].w = (struct element_t **)
1271                           obstack_alloc (&collate->mempool,
1272                                          sizeof (struct element_t *));
1273                         elem->weights[cnt].w[0] = elem;
1274                         elem->weights[cnt].cnt = 1;
1275                       }
1276                     else
1277                       {
1278                         /* Simply use the weight from `ellipsis_weight'.  */
1279                         elem->weights[cnt].w =
1280                           collate->ellipsis_weight.weights[cnt].w;
1281                         elem->weights[cnt].cnt =
1282                           collate->ellipsis_weight.weights[cnt].cnt;
1283                       }
1284                 }
1285
1286               /* Increment for the next round.  */
1287             increment:
1288               for (cnt = len - 1; cnt >= 0; --cnt)
1289                 if (++mbcnt[cnt] != '\0')
1290                   break;
1291
1292               /* Find out whether this was all.  */
1293               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1294                 /* Yep, that's all.  */
1295                 break;
1296             }
1297         }
1298     }
1299   else
1300     {
1301       /* For symbolic range we naturally must have a beginning and an
1302          end specified by the user.  */
1303       if (startp == NULL)
1304         lr_error (ldfile, _("\
1305 %s: symbolic range ellipsis must not directly follow `order_start'"),
1306                   "LC_COLLATE");
1307       else if (endp == NULL)
1308         lr_error (ldfile, _("\
1309 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1310                   "LC_COLLATE");
1311       else
1312         {
1313           /* Determine the range.  To do so we have to determine the
1314              common prefix of the both names and then the numeric
1315              values of both ends.  */
1316           size_t lenfrom = strlen (startp->name);
1317           size_t lento = strlen (endp->name);
1318           char buf[lento + 1];
1319           int preflen = 0;
1320           long int from;
1321           long int to;
1322           char *cp;
1323           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1324
1325           if (lenfrom != lento)
1326             {
1327             invalid_range:
1328               lr_error (ldfile, _("\
1329 `%s' and `%.*s' are not valid names for symbolic range"),
1330                         startp->name, (int) lento, endp->name);
1331               return;
1332             }
1333
1334           while (startp->name[preflen] == endp->name[preflen])
1335             if (startp->name[preflen] == '\0')
1336               /* Nothing to be done.  The start and end point are identical
1337                  and while inserting the end point we have already given
1338                  the user an error message.  */
1339               return;
1340             else
1341               ++preflen;
1342
1343           errno = 0;
1344           from = strtol (startp->name + preflen, &cp, base);
1345           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1346             goto invalid_range;
1347
1348           errno = 0;
1349           to = strtol (endp->name + preflen, &cp, base);
1350           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1351             goto invalid_range;
1352
1353           /* Copy the prefix.  */
1354           memcpy (buf, startp->name, preflen);
1355
1356           /* Loop over all values.  */
1357           for (++from; from < to; ++from)
1358             {
1359               struct element_t *elem = NULL;
1360               struct charseq *seq;
1361               uint32_t wc;
1362               int cnt;
1363
1364               /* Generate the name.  */
1365               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1366                        (int) (lenfrom - preflen), from);
1367
1368               /* Look whether this name is already defined.  */
1369               void *ptr;
1370               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1371                 {
1372                   /* Copy back the result.  */
1373                   elem = ptr;
1374
1375                   if (elem->next != NULL || (collate->cursor != NULL
1376                                              && elem->next == collate->cursor))
1377                     {
1378                       lr_error (ldfile, _("\
1379 %s: order for `%.*s' already defined at %s:%Zu"),
1380                                 "LC_COLLATE", (int) lenfrom, buf,
1381                                 elem->file, elem->line);
1382                       continue;
1383                     }
1384
1385                   if (elem->name == NULL)
1386                     {
1387                       lr_error (ldfile, _("%s: `%s' must be a character"),
1388                                 "LC_COLLATE", buf);
1389                       continue;
1390                     }
1391                 }
1392
1393               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1394                 {
1395                   /* Search for a character of this name.  */
1396                   seq = charmap_find_value (charmap, buf, lenfrom);
1397                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1398                     {
1399                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1400
1401                       if (seq != NULL)
1402                         seq->ucs4 = wc;
1403                     }
1404                   else
1405                     wc = seq->ucs4;
1406
1407                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1408                     /* We don't know anything about a character with this
1409                        name.  XXX Should we warn?  */
1410                     continue;
1411
1412                   if (elem == NULL)
1413                     {
1414                       uint32_t wcs[2] = { wc, 0 };
1415
1416                       /* We have to allocate an entry.  */
1417                       elem = new_element (collate,
1418                                           seq != NULL
1419                                           ? (char *) seq->bytes : NULL,
1420                                           seq != NULL ? seq->nbytes : 0,
1421                                           wc == ILLEGAL_CHAR_VALUE
1422                                           ? NULL : wcs, buf, lenfrom, 1);
1423                     }
1424                   else
1425                     {
1426                       /* Update the element.  */
1427                       if (seq != NULL)
1428                         {
1429                           elem->mbs = obstack_copy0 (&collate->mempool,
1430                                                      seq->bytes, seq->nbytes);
1431                           elem->nmbs = seq->nbytes;
1432                         }
1433
1434                       if (wc != ILLEGAL_CHAR_VALUE)
1435                         {
1436                           uint32_t zero = 0;
1437
1438                           obstack_grow (&collate->mempool,
1439                                         &wc, sizeof (uint32_t));
1440                           obstack_grow (&collate->mempool,
1441                                         &zero, sizeof (uint32_t));
1442                           elem->wcs = obstack_finish (&collate->mempool);
1443                           elem->nwcs = 1;
1444                         }
1445                     }
1446
1447                   elem->file = ldfile->fname;
1448                   elem->line = ldfile->lineno;
1449                   elem->section = collate->current_section;
1450                 }
1451
1452               /* Enqueue the new element.  */
1453               elem->last = collate->cursor;
1454               elem->next = collate->cursor->next;
1455               elem->last->next = elem;
1456               if (elem->next != NULL)
1457                 elem->next->last = elem;
1458               collate->cursor = elem;
1459
1460               /* Now add the weights.  They come from the `ellipsis_weights'
1461                  member of `collate'.  */
1462               elem->weights = (struct element_list_t *)
1463                 obstack_alloc (&collate->mempool,
1464                                nrules * sizeof (struct element_list_t));
1465               for (cnt = 0; cnt < nrules; ++cnt)
1466                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1467                     && (collate->ellipsis_weight.weights[cnt].w[0]
1468                         == ELEMENT_ELLIPSIS2))
1469                   {
1470                     elem->weights[cnt].w = (struct element_t **)
1471                       obstack_alloc (&collate->mempool,
1472                                      sizeof (struct element_t *));
1473                     elem->weights[cnt].w[0] = elem;
1474                     elem->weights[cnt].cnt = 1;
1475                   }
1476                 else
1477                   {
1478                     /* Simly use the weight from `ellipsis_weight'.  */
1479                     elem->weights[cnt].w =
1480                       collate->ellipsis_weight.weights[cnt].w;
1481                     elem->weights[cnt].cnt =
1482                       collate->ellipsis_weight.weights[cnt].cnt;
1483                   }
1484             }
1485         }
1486     }
1487 }
1488
1489
1490 static void
1491 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1492                  struct localedef_t *copy_locale, int ignore_content)
1493 {
1494   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1495     {
1496       struct locale_collate_t *collate;
1497
1498       if (copy_locale == NULL)
1499         {
1500           collate = locale->categories[LC_COLLATE].collate =
1501             (struct locale_collate_t *)
1502             xcalloc (1, sizeof (struct locale_collate_t));
1503
1504           /* Init the various data structures.  */
1505           init_hash (&collate->elem_table, 100);
1506           init_hash (&collate->sym_table, 100);
1507           init_hash (&collate->seq_table, 500);
1508           obstack_init (&collate->mempool);
1509
1510           collate->col_weight_max = -1;
1511         }
1512       else
1513         /* Reuse the copy_locale's data structures.  */
1514         collate = locale->categories[LC_COLLATE].collate =
1515           copy_locale->categories[LC_COLLATE].collate;
1516     }
1517
1518   ldfile->translate_strings = 0;
1519   ldfile->return_widestr = 0;
1520 }
1521
1522
1523 void
1524 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1525 {
1526   /* Now is the time when we can assign the individual collation
1527      values for all the symbols.  We have possibly different values
1528      for the wide- and the multibyte-character symbols.  This is done
1529      since it might make a difference in the encoding if there is in
1530      some cases no multibyte-character but there are wide-characters.
1531      (The other way around it is not important since theencoded
1532      collation value in the wide-character case is 32 bits wide and
1533      therefore requires no encoding).
1534
1535      The lowest collation value assigned is 2.  Zero is reserved for
1536      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1537      functions and 1 is used to separate the individual passes for the
1538      different rules.
1539
1540      We also have to construct is list with all the bytes/words which
1541      can come first in a sequence, followed by all the elements which
1542      also start with this byte/word.  The order is reverse which has
1543      among others the important effect that longer strings are located
1544      first in the list.  This is required for the output data since
1545      the algorithm used in `strcoll' etc depends on this.
1546
1547      The multibyte case is easy.  We simply sort into an array with
1548      256 elements.  */
1549   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1550   int mbact[nrules];
1551   int wcact;
1552   int mbseqact;
1553   int wcseqact;
1554   struct element_t *runp;
1555   int i;
1556   int need_undefined = 0;
1557   struct section_list *sect;
1558   int ruleidx;
1559   int nr_wide_elems = 0;
1560
1561   if (collate == NULL)
1562     {
1563       /* No data, no check.  */
1564       if (! be_quiet)
1565         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1566                                 "LC_COLLATE"));
1567       return;
1568     }
1569
1570   /* If this assertion is hit change the type in `element_t'.  */
1571   assert (nrules <= sizeof (runp->used_in_level) * 8);
1572
1573   /* Make sure that the `position' rule is used either in all sections
1574      or in none.  */
1575   for (i = 0; i < nrules; ++i)
1576     for (sect = collate->sections; sect != NULL; sect = sect->next)
1577       if (sect != collate->current_section
1578           && sect->rules != NULL
1579           && ((sect->rules[i] & sort_position)
1580               != (collate->current_section->rules[i] & sort_position)))
1581         {
1582           WITH_CUR_LOCALE (error (0, 0, _("\
1583 %s: `position' must be used for a specific level in all sections or none"),
1584                                   "LC_COLLATE"));
1585           break;
1586         }
1587
1588   /* Find out which elements are used at which level.  At the same
1589      time we find out whether we have any undefined symbols.  */
1590   runp = collate->start;
1591   while (runp != NULL)
1592     {
1593       if (runp->mbs != NULL)
1594         {
1595           for (i = 0; i < nrules; ++i)
1596             {
1597               int j;
1598
1599               for (j = 0; j < runp->weights[i].cnt; ++j)
1600                 /* A NULL pointer as the weight means IGNORE.  */
1601                 if (runp->weights[i].w[j] != NULL)
1602                   {
1603                     if (runp->weights[i].w[j]->weights == NULL)
1604                       {
1605                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1606                                                         runp->line,
1607                                                         _("symbol `%s' not defined"),
1608                                                         runp->weights[i].w[j]->name));
1609
1610                         need_undefined = 1;
1611                         runp->weights[i].w[j] = &collate->undefined;
1612                       }
1613                     else
1614                       /* Set the bit for the level.  */
1615                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1616                   }
1617             }
1618         }
1619
1620       /* Up to the next entry.  */
1621       runp = runp->next;
1622     }
1623
1624   /* Walk through the list of defined sequences and assign weights.  Also
1625      create the data structure which will allow generating the single byte
1626      character based tables.
1627
1628      Since at each time only the weights for each of the rules are
1629      only compared to other weights for this rule it is possible to
1630      assign more compact weight values than simply counting all
1631      weights in sequence.  We can assign weights from 3, one for each
1632      rule individually and only for those elements, which are actually
1633      used for this rule.
1634
1635      Why is this important?  It is not for the wide char table.  But
1636      it is for the singlebyte output since here larger numbers have to
1637      be encoded to make it possible to emit the value as a byte
1638      string.  */
1639   for (i = 0; i < nrules; ++i)
1640     mbact[i] = 2;
1641   wcact = 2;
1642   mbseqact = 0;
1643   wcseqact = 0;
1644   runp = collate->start;
1645   while (runp != NULL)
1646     {
1647       /* Determine the order.  */
1648       if (runp->used_in_level != 0)
1649         {
1650           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1651                                                  nrules * sizeof (int));
1652
1653           for (i = 0; i < nrules; ++i)
1654             if ((runp->used_in_level & (1 << i)) != 0)
1655               runp->mborder[i] = mbact[i]++;
1656             else
1657               runp->mborder[i] = 0;
1658         }
1659
1660       if (runp->mbs != NULL)
1661         {
1662           struct element_t **eptr;
1663           struct element_t *lastp = NULL;
1664
1665           /* Find the point where to insert in the list.  */
1666           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1667           while (*eptr != NULL)
1668             {
1669               if ((*eptr)->nmbs < runp->nmbs)
1670                 break;
1671
1672               if ((*eptr)->nmbs == runp->nmbs)
1673                 {
1674                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1675
1676                   if (c == 0)
1677                     {
1678                       /* This should not happen.  It means that we have
1679                          to symbols with the same byte sequence.  It is
1680                          of course an error.  */
1681                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1682                                                       (*eptr)->line,
1683                                                       _("\
1684 symbol `%s' has the same encoding as"), (*eptr)->name);
1685                                        error_at_line (0, 0, runp->file,
1686                                                       runp->line,
1687                                                       _("symbol `%s'"),
1688                                                       runp->name));
1689                       goto dont_insert;
1690                     }
1691                   else if (c < 0)
1692                     /* Insert it here.  */
1693                     break;
1694                 }
1695
1696               /* To the next entry.  */
1697               lastp = *eptr;
1698               eptr = &(*eptr)->mbnext;
1699             }
1700
1701           /* Set the pointers.  */
1702           runp->mbnext = *eptr;
1703           runp->mblast = lastp;
1704           if (*eptr != NULL)
1705             (*eptr)->mblast = runp;
1706           *eptr = runp;
1707         dont_insert:
1708           ;
1709         }
1710
1711       if (runp->used_in_level)
1712         {
1713           runp->wcorder = wcact++;
1714
1715           /* We take the opportunity to count the elements which have
1716              wide characters.  */
1717           ++nr_wide_elems;
1718         }
1719
1720       if (runp->is_character)
1721         {
1722           if (runp->nmbs == 1)
1723             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1724
1725           runp->wcseqorder = wcseqact++;
1726         }
1727       else if (runp->mbs != NULL && runp->weights != NULL)
1728         /* This is for collation elements.  */
1729         runp->wcseqorder = wcseqact++;
1730
1731       /* Up to the next entry.  */
1732       runp = runp->next;
1733     }
1734
1735   /* Find out whether any of the `mbheads' entries is unset.  In this
1736      case we use the UNDEFINED entry.  */
1737   for (i = 1; i < 256; ++i)
1738     if (collate->mbheads[i] == NULL)
1739       {
1740         need_undefined = 1;
1741         collate->mbheads[i] = &collate->undefined;
1742       }
1743
1744   /* Now to the wide character case.  */
1745   collate->wcheads.p = 6;
1746   collate->wcheads.q = 10;
1747   wchead_table_init (&collate->wcheads);
1748
1749   collate->wcseqorder.p = 6;
1750   collate->wcseqorder.q = 10;
1751   collseq_table_init (&collate->wcseqorder);
1752
1753   /* Start adding.  */
1754   runp = collate->start;
1755   while (runp != NULL)
1756     {
1757       if (runp->wcs != NULL)
1758         {
1759           struct element_t *e;
1760           struct element_t **eptr;
1761           struct element_t *lastp;
1762
1763           /* Insert the collation sequence value.  */
1764           if (runp->is_character)
1765             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1766                                runp->wcseqorder);
1767
1768           /* Find the point where to insert in the list.  */
1769           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1770           eptr = &e;
1771           lastp = NULL;
1772           while (*eptr != NULL)
1773             {
1774               if ((*eptr)->nwcs < runp->nwcs)
1775                 break;
1776
1777               if ((*eptr)->nwcs == runp->nwcs)
1778                 {
1779                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1780                                    (wchar_t *) runp->wcs, runp->nwcs);
1781
1782                   if (c == 0)
1783                     {
1784                       /* This should not happen.  It means that we have
1785                          two symbols with the same byte sequence.  It is
1786                          of course an error.  */
1787                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1788                                                       (*eptr)->line,
1789                                                       _("\
1790 symbol `%s' has the same encoding as"), (*eptr)->name);
1791                                        error_at_line (0, 0, runp->file,
1792                                                       runp->line,
1793                                                       _("symbol `%s'"),
1794                                                       runp->name));
1795                       goto dont_insertwc;
1796                     }
1797                   else if (c < 0)
1798                     /* Insert it here.  */
1799                     break;
1800                 }
1801
1802               /* To the next entry.  */
1803               lastp = *eptr;
1804               eptr = &(*eptr)->wcnext;
1805             }
1806
1807           /* Set the pointers.  */
1808           runp->wcnext = *eptr;
1809           runp->wclast = lastp;
1810           if (*eptr != NULL)
1811             (*eptr)->wclast = runp;
1812           *eptr = runp;
1813           if (eptr == &e)
1814             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1815         dont_insertwc:
1816           ;
1817         }
1818
1819       /* Up to the next entry.  */
1820       runp = runp->next;
1821     }
1822
1823   /* Now determine whether the UNDEFINED entry is needed and if yes,
1824      whether it was defined.  */
1825   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1826   if (collate->undefined.file == NULL)
1827     {
1828       if (need_undefined)
1829         {
1830           /* This seems not to be enforced by recent standards.  Don't
1831              emit an error, simply append UNDEFINED at the end.  */
1832           if (0)
1833             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1834
1835           /* Add UNDEFINED at the end.  */
1836           collate->undefined.mborder =
1837             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1838
1839           for (i = 0; i < nrules; ++i)
1840             collate->undefined.mborder[i] = mbact[i]++;
1841         }
1842
1843       /* In any case we will need the definition for the wide character
1844          case.  But we will not complain that it is missing since the
1845          specification strangely enough does not seem to account for
1846          this.  */
1847       collate->undefined.wcorder = wcact++;
1848     }
1849
1850   /* Finally, try to unify the rules for the sections.  Whenever the rules
1851      for a section are the same as those for another section give the
1852      ruleset the same index.  Since there are never many section we can
1853      use an O(n^2) algorithm here.  */
1854   sect = collate->sections;
1855   while (sect != NULL && sect->rules == NULL)
1856     sect = sect->next;
1857
1858   /* Bail out if we have no sections because of earlier errors.  */
1859   if (sect == NULL)
1860     {
1861       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1862                               _("too many errors; giving up")));
1863       return;
1864     }
1865
1866   ruleidx = 0;
1867   do
1868     {
1869       struct section_list *osect = collate->sections;
1870
1871       while (osect != sect)
1872         if (osect->rules != NULL
1873             && memcmp (osect->rules, sect->rules,
1874                        nrules * sizeof (osect->rules[0])) == 0)
1875           break;
1876         else
1877           osect = osect->next;
1878
1879       if (osect == sect)
1880         sect->ruleidx = ruleidx++;
1881       else
1882         sect->ruleidx = osect->ruleidx;
1883
1884       /* Next section.  */
1885       do
1886         sect = sect->next;
1887       while (sect != NULL && sect->rules == NULL);
1888     }
1889   while (sect != NULL);
1890   /* We are currently not prepared for more than 128 rulesets.  But this
1891      should never really be a problem.  */
1892   assert (ruleidx <= 128);
1893 }
1894
1895
1896 static int32_t
1897 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1898                struct element_t *elem)
1899 {
1900   size_t cnt;
1901   int32_t retval;
1902
1903   /* Optimize the use of UNDEFINED.  */
1904   if (elem == &collate->undefined)
1905     /* The weights are already inserted.  */
1906     return 0;
1907
1908   /* This byte can start exactly one collation element and this is
1909      a single byte.  We can directly give the index to the weights.  */
1910   retval = obstack_object_size (pool);
1911
1912   /* Construct the weight.  */
1913   for (cnt = 0; cnt < nrules; ++cnt)
1914     {
1915       char buf[elem->weights[cnt].cnt * 7];
1916       int len = 0;
1917       int i;
1918
1919       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1920         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1921         if (elem->weights[cnt].w[i] != NULL)
1922           len += utf8_encode (&buf[len],
1923                               elem->weights[cnt].w[i]->mborder[cnt]);
1924
1925       /* And add the buffer content.  */
1926       obstack_1grow (pool, len);
1927       obstack_grow (pool, buf, len);
1928     }
1929
1930   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1931 }
1932
1933
1934 static int32_t
1935 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1936                  struct element_t *elem)
1937 {
1938   size_t cnt;
1939   int32_t retval;
1940
1941   /* Optimize the use of UNDEFINED.  */
1942   if (elem == &collate->undefined)
1943     /* The weights are already inserted.  */
1944     return 0;
1945
1946   /* This byte can start exactly one collation element and this is
1947      a single byte.  We can directly give the index to the weights.  */
1948   retval = obstack_object_size (pool) / sizeof (int32_t);
1949
1950   /* Construct the weight.  */
1951   for (cnt = 0; cnt < nrules; ++cnt)
1952     {
1953       int32_t buf[elem->weights[cnt].cnt];
1954       int i;
1955       int32_t j;
1956
1957       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1958         if (elem->weights[cnt].w[i] != NULL)
1959           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1960
1961       /* And add the buffer content.  */
1962       obstack_int32_grow (pool, j);
1963
1964       obstack_grow (pool, buf, j * sizeof (int32_t));
1965       maybe_swap_uint32_obstack (pool, j);
1966     }
1967
1968   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1969 }
1970
1971 /* If localedef is every threaded, this would need to be __thread var.  */
1972 static struct
1973 {
1974   struct obstack *weightpool;
1975   struct obstack *extrapool;
1976   struct obstack *indpool;
1977   struct locale_collate_t *collate;
1978   struct collidx_table *tablewc;
1979 } atwc;
1980
1981 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1982
1983 static void
1984 add_to_tablewc (uint32_t ch, struct element_t *runp)
1985 {
1986   if (runp->wcnext == NULL && runp->nwcs == 1)
1987     {
1988       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1989                                            runp);
1990       collidx_table_add (atwc.tablewc, ch, weigthidx);
1991     }
1992   else
1993     {
1994       /* As for the singlebyte table, we recognize sequences and
1995          compress them.  */
1996
1997       collidx_table_add (atwc.tablewc, ch,
1998                          -(obstack_object_size (atwc.extrapool)
1999                          / sizeof (uint32_t)));
2000
2001       do
2002         {
2003           /* Store the current index in the weight table.  We know that
2004              the current position in the `extrapool' is aligned on a
2005              32-bit address.  */
2006           int32_t weightidx;
2007           int added;
2008
2009           /* Find out wether this is a single entry or we have more than
2010              one consecutive entry.  */
2011           if (runp->wcnext != NULL
2012               && runp->nwcs == runp->wcnext->nwcs
2013               && wmemcmp ((wchar_t *) runp->wcs,
2014                           (wchar_t *)runp->wcnext->wcs,
2015                           runp->nwcs - 1) == 0
2016               && (runp->wcs[runp->nwcs - 1]
2017                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2018             {
2019               int i;
2020               struct element_t *series_startp = runp;
2021               struct element_t *curp;
2022
2023               /* Now add first the initial byte sequence.  */
2024               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2025               if (sizeof (int32_t) == sizeof (int))
2026                 obstack_make_room (atwc.extrapool, added);
2027
2028               /* More than one consecutive entry.  We mark this by having
2029                  a negative index into the indirect table.  */
2030               obstack_int32_grow_fast (atwc.extrapool,
2031                                        -(obstack_object_size (atwc.indpool)
2032                                          / sizeof (int32_t)));
2033               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2034
2035               do
2036                 runp = runp->wcnext;
2037               while (runp->wcnext != NULL
2038                      && runp->nwcs == runp->wcnext->nwcs
2039                      && wmemcmp ((wchar_t *) runp->wcs,
2040                                  (wchar_t *)runp->wcnext->wcs,
2041                                  runp->nwcs - 1) == 0
2042                      && (runp->wcs[runp->nwcs - 1]
2043                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2044
2045               /* Now walk backward from here to the beginning.  */
2046               curp = runp;
2047
2048               for (i = 1; i < runp->nwcs; ++i)
2049                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2050
2051               /* Now find the end of the consecutive sequence and
2052                  add all the indeces in the indirect pool.  */
2053               do
2054                 {
2055                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2056                                                curp);
2057                   obstack_int32_grow (atwc.indpool, weightidx);
2058
2059                   curp = curp->wclast;
2060                 }
2061               while (curp != series_startp);
2062
2063               /* Add the final weight.  */
2064               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2065                                            curp);
2066               obstack_int32_grow (atwc.indpool, weightidx);
2067
2068               /* And add the end byte sequence.  Without length this
2069                  time.  */
2070               for (i = 1; i < curp->nwcs; ++i)
2071                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2072             }
2073           else
2074             {
2075               /* A single entry.  Simply add the index and the length and
2076                  string (except for the first character which is already
2077                  tested for).  */
2078               int i;
2079
2080               /* Output the weight info.  */
2081               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2082                                            runp);
2083
2084               assert (runp->nwcs > 0);
2085               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2086               if (sizeof (int) == sizeof (int32_t))
2087                 obstack_make_room (atwc.extrapool, added);
2088
2089               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2090               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2091               for (i = 1; i < runp->nwcs; ++i)
2092                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2093             }
2094
2095           /* Next entry.  */
2096           runp = runp->wcnext;
2097         }
2098       while (runp != NULL);
2099     }
2100 }
2101
2102 void
2103 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2104                 const char *output_path)
2105 {
2106   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2107   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2108   struct locale_file file;
2109   size_t ch;
2110   int32_t tablemb[256];
2111   struct obstack weightpool;
2112   struct obstack extrapool;
2113   struct obstack indirectpool;
2114   struct section_list *sect;
2115   struct collidx_table tablewc;
2116   uint32_t elem_size;
2117   uint32_t *elem_table;
2118   int i;
2119   struct element_t *runp;
2120
2121   init_locale_data (&file, nelems);
2122   add_locale_uint32 (&file, nrules);
2123
2124   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2125   if (collate == NULL)
2126     {
2127       size_t idx;
2128       for (idx = 1; idx < nelems; idx++)
2129         {
2130           /* The words have to be handled specially.  */
2131           if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2132             add_locale_uint32 (&file, 0);
2133           else
2134             add_locale_empty (&file);
2135         }
2136       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2137       return;
2138     }
2139
2140   obstack_init (&weightpool);
2141   obstack_init (&extrapool);
2142   obstack_init (&indirectpool);
2143
2144   /* Since we are using the sign of an integer to mark indirection the
2145      offsets in the arrays we are indirectly referring to must not be
2146      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2147   obstack_int32_grow (&extrapool, 0);
2148   obstack_int32_grow (&indirectpool, 0);
2149
2150   /* Prepare the ruleset table.  */
2151   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2152     if (sect->rules != NULL && sect->ruleidx == i)
2153       {
2154         int j;
2155
2156         obstack_make_room (&weightpool, nrules);
2157
2158         for (j = 0; j < nrules; ++j)
2159           obstack_1grow_fast (&weightpool, sect->rules[j]);
2160         ++i;
2161       }
2162   /* And align the output.  */
2163   i = (nrules * i) % LOCFILE_ALIGN;
2164   if (i > 0)
2165     do
2166       obstack_1grow (&weightpool, '\0');
2167     while (++i < LOCFILE_ALIGN);
2168
2169   add_locale_raw_obstack (&file, &weightpool);
2170
2171   /* Generate the 8-bit table.  Walk through the lists of sequences
2172      starting with the same byte and add them one after the other to
2173      the table.  In case we have more than one sequence starting with
2174      the same byte we have to use extra indirection.
2175
2176      First add a record for the NUL byte.  This entry will never be used
2177      so it does not matter.  */
2178   tablemb[0] = 0;
2179
2180   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2181      will probably be used more than once it is good to store the
2182      weights only once.  */
2183   if (collate->undefined.used_in_level != 0)
2184     output_weight (&weightpool, collate, &collate->undefined);
2185
2186   for (ch = 1; ch < 256; ++ch)
2187     if (collate->mbheads[ch]->mbnext == NULL
2188         && collate->mbheads[ch]->nmbs <= 1)
2189       {
2190         tablemb[ch] = output_weight (&weightpool, collate,
2191                                      collate->mbheads[ch]);
2192       }
2193     else
2194       {
2195         /* The entries in the list are sorted by length and then
2196            alphabetically.  This is the order in which we will add the
2197            elements to the collation table.  This allows simply walking
2198            the table in sequence and stopping at the first matching
2199            entry.  Since the longer sequences are coming first in the
2200            list they have the possibility to match first, just as it
2201            has to be.  In the worst case we are walking to the end of
2202            the list where we put, if no singlebyte sequence is defined
2203            in the locale definition, the weights for UNDEFINED.
2204
2205            To reduce the length of the search list we compress them a bit.
2206            This happens by collecting sequences of consecutive byte
2207            sequences in one entry (having and begin and end byte sequence)
2208            and add only one index into the weight table.  We can find the
2209            consecutive entries since they are also consecutive in the list.  */
2210         struct element_t *runp = collate->mbheads[ch];
2211         struct element_t *lastp;
2212
2213         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2214
2215         tablemb[ch] = -obstack_object_size (&extrapool);
2216
2217         do
2218           {
2219             /* Store the current index in the weight table.  We know that
2220                the current position in the `extrapool' is aligned on a
2221                32-bit address.  */
2222             int32_t weightidx;
2223             int added;
2224
2225             /* Find out wether this is a single entry or we have more than
2226                one consecutive entry.  */
2227             if (runp->mbnext != NULL
2228                 && runp->nmbs == runp->mbnext->nmbs
2229                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2230                 && (runp->mbs[runp->nmbs - 1]
2231                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2232               {
2233                 int i;
2234                 struct element_t *series_startp = runp;
2235                 struct element_t *curp;
2236
2237                 /* Compute how much space we will need.  */
2238                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2239                                           + 2 * (runp->nmbs - 1));
2240                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2241                 obstack_make_room (&extrapool, added);
2242
2243                 /* More than one consecutive entry.  We mark this by having
2244                    a negative index into the indirect table.  */
2245                 obstack_int32_grow_fast (&extrapool,
2246                                          -(obstack_object_size (&indirectpool)
2247                                            / sizeof (int32_t)));
2248
2249                 /* Now search first the end of the series.  */
2250                 do
2251                   runp = runp->mbnext;
2252                 while (runp->mbnext != NULL
2253                        && runp->nmbs == runp->mbnext->nmbs
2254                        && memcmp (runp->mbs, runp->mbnext->mbs,
2255                                   runp->nmbs - 1) == 0
2256                        && (runp->mbs[runp->nmbs - 1]
2257                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2258
2259                 /* Now walk backward from here to the beginning.  */
2260                 curp = runp;
2261
2262                 assert (runp->nmbs <= 256);
2263                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2264                 for (i = 1; i < curp->nmbs; ++i)
2265                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2266
2267                 /* Now find the end of the consecutive sequence and
2268                    add all the indeces in the indirect pool.  */
2269                 do
2270                   {
2271                     weightidx = output_weight (&weightpool, collate, curp);
2272                     obstack_int32_grow (&indirectpool, weightidx);
2273
2274                     curp = curp->mblast;
2275                   }
2276                 while (curp != series_startp);
2277
2278                 /* Add the final weight.  */
2279                 weightidx = output_weight (&weightpool, collate, curp);
2280                 obstack_int32_grow (&indirectpool, weightidx);
2281
2282                 /* And add the end byte sequence.  Without length this
2283                    time.  */
2284                 for (i = 1; i < curp->nmbs; ++i)
2285                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2286               }
2287             else
2288               {
2289                 /* A single entry.  Simply add the index and the length and
2290                    string (except for the first character which is already
2291                    tested for).  */
2292                 int i;
2293
2294                 /* Output the weight info.  */
2295                 weightidx = output_weight (&weightpool, collate, runp);
2296
2297                 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2298                                           + runp->nmbs - 1);
2299                 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2300                 obstack_make_room (&extrapool, added);
2301
2302                 obstack_int32_grow_fast (&extrapool, weightidx);
2303                 assert (runp->nmbs <= 256);
2304                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2305
2306                 for (i = 1; i < runp->nmbs; ++i)
2307                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2308               }
2309
2310             /* Add alignment bytes if necessary.  */
2311             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2312               obstack_1grow_fast (&extrapool, '\0');
2313
2314             /* Next entry.  */
2315             lastp = runp;
2316             runp = runp->mbnext;
2317           }
2318         while (runp != NULL);
2319
2320         assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2321
2322         /* If the final entry in the list is not a single character we
2323            add an UNDEFINED entry here.  */
2324         if (lastp->nmbs != 1)
2325           {
2326             int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2327             obstack_make_room (&extrapool, added);
2328
2329             obstack_int32_grow_fast (&extrapool, 0);
2330             /* XXX What rule? We just pick the first.  */
2331             obstack_1grow_fast (&extrapool, 0);
2332             /* Length is zero.  */
2333             obstack_1grow_fast (&extrapool, 0);
2334
2335             /* Add alignment bytes if necessary.  */
2336             while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2337               obstack_1grow_fast (&extrapool, '\0');
2338           }
2339       }
2340
2341   /* Add padding to the tables if necessary.  */
2342   while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2343     obstack_1grow (&weightpool, 0);
2344
2345   /* Now add the four tables.  */
2346   add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2347   add_locale_raw_obstack (&file, &weightpool);
2348   add_locale_raw_obstack (&file, &extrapool);
2349   add_locale_raw_obstack (&file, &indirectpool);
2350
2351   /* Now the same for the wide character table.  We need to store some
2352      more information here.  */
2353   add_locale_empty (&file);
2354   add_locale_empty (&file);
2355   add_locale_empty (&file);
2356
2357   /* Since we are using the sign of an integer to mark indirection the
2358      offsets in the arrays we are indirectly referring to must not be
2359      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2360   obstack_int32_grow (&extrapool, 0);
2361   obstack_int32_grow (&indirectpool, 0);
2362
2363   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2364      will probably be used more than once it is good to store the
2365      weights only once.  */
2366   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2367     abort ();
2368
2369   /* Generate the table.  Walk through the lists of sequences starting
2370      with the same wide character and add them one after the other to
2371      the table.  In case we have more than one sequence starting with
2372      the same byte we have to use extra indirection.  */
2373   tablewc.p = 6;
2374   tablewc.q = 10;
2375   collidx_table_init (&tablewc);
2376
2377   atwc.weightpool = &weightpool;
2378   atwc.extrapool = &extrapool;
2379   atwc.indpool = &indirectpool;
2380   atwc.collate = collate;
2381   atwc.tablewc = &tablewc;
2382
2383   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2384
2385   memset (&atwc, 0, sizeof (atwc));
2386
2387   /* Now add the four tables.  */
2388   add_locale_collidx_table (&file, &tablewc);
2389   add_locale_raw_obstack (&file, &weightpool);
2390   add_locale_raw_obstack (&file, &extrapool);
2391   add_locale_raw_obstack (&file, &indirectpool);
2392
2393   /* Finally write the table with collation element names out.  It is
2394      a hash table with a simple function which gets the name of the
2395      character as the input.  One character might have many names.  The
2396      value associated with the name is an index into the weight table
2397      where we are then interested in the first-level weight value.
2398
2399      To determine how large the table should be we are counting the
2400      elements have to put in.  Since we are using internal chaining
2401      using a secondary hash function we have to make the table a bit
2402      larger to avoid extremely long search times.  We can achieve
2403      good results with a 40% larger table than there are entries.  */
2404   elem_size = 0;
2405   runp = collate->start;
2406   while (runp != NULL)
2407     {
2408       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2409         /* Yep, the element really counts.  */
2410         ++elem_size;
2411
2412       runp = runp->next;
2413     }
2414   /* Add 40% and find the next prime number.  */
2415   elem_size = next_prime (elem_size * 1.4);
2416
2417   /* Allocate the table.  Each entry consists of two words: the hash
2418      value and an index in a secondary table which provides the index
2419      into the weight table and the string itself (so that a match can
2420      be determined).  */
2421   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2422                                            elem_size * 2 * sizeof (uint32_t));
2423   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2424
2425   /* Now add the elements.  */
2426   runp = collate->start;
2427   while (runp != NULL)
2428     {
2429       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2430         {
2431           /* Compute the hash value of the name.  */
2432           uint32_t namelen = strlen (runp->name);
2433           uint32_t hash = elem_hash (runp->name, namelen);
2434           size_t idx = hash % elem_size;
2435 #ifndef NDEBUG
2436           size_t start_idx = idx;
2437 #endif
2438
2439           if (elem_table[idx * 2] != 0)
2440             {
2441               /* The spot is already taken.  Try iterating using the value
2442                  from the secondary hashing function.  */
2443               size_t iter = hash % (elem_size - 2) + 1;
2444
2445               do
2446                 {
2447                   idx += iter;
2448                   if (idx >= elem_size)
2449                     idx -= elem_size;
2450                   assert (idx != start_idx);
2451                 }
2452               while (elem_table[idx * 2] != 0);
2453             }
2454           /* This is the spot where we will insert the value.  */
2455           elem_table[idx * 2] = hash;
2456           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2457
2458           /* The string itself including length.  */
2459           obstack_1grow (&extrapool, namelen);
2460           obstack_grow (&extrapool, runp->name, namelen);
2461
2462           /* And the multibyte representation.  */
2463           obstack_1grow (&extrapool, runp->nmbs);
2464           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2465
2466           /* And align again to 32 bits.  */
2467           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2468             obstack_grow (&extrapool, "\0\0",
2469                           (sizeof (int32_t)
2470                            - ((1 + namelen + 1 + runp->nmbs)
2471                               % sizeof (int32_t))));
2472
2473           /* Now some 32-bit values: multibyte collation sequence,
2474              wide char string (including length), and wide char
2475              collation sequence.  */
2476           obstack_int32_grow (&extrapool, runp->mbseqorder);
2477
2478           obstack_int32_grow (&extrapool, runp->nwcs);
2479           obstack_grow (&extrapool, runp->wcs,
2480                         runp->nwcs * sizeof (uint32_t));
2481           maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2482
2483           obstack_int32_grow (&extrapool, runp->wcseqorder);
2484         }
2485
2486       runp = runp->next;
2487     }
2488
2489   /* Prepare to write out this data.  */
2490   add_locale_uint32 (&file, elem_size);
2491   add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2492   add_locale_raw_obstack (&file, &extrapool);
2493   add_locale_raw_data (&file, collate->mbseqorder, 256);
2494   add_locale_collseq_table (&file, &collate->wcseqorder);
2495   add_locale_string (&file, charmap->code_set_name);
2496   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2497
2498   obstack_free (&weightpool, NULL);
2499   obstack_free (&extrapool, NULL);
2500   obstack_free (&indirectpool, NULL);
2501 }
2502
2503
2504 static enum token_t
2505 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2506          const struct charmap_t *charmap, int to_endif)
2507 {
2508   while (1)
2509     {
2510       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2511       enum token_t nowtok = now->tok;
2512
2513       if (nowtok == tok_eof || nowtok == tok_end)
2514         return nowtok;
2515
2516       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2517         {
2518           lr_error (ldfile, _("%s: nested conditionals not supported"),
2519                     "LC_COLLATE");
2520           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2521           if (nowtok == tok_eof || nowtok == tok_end)
2522             return nowtok;
2523         }
2524       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2525         {
2526           lr_ignore_rest (ldfile, 1);
2527           return nowtok;
2528         }
2529       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2530         {
2531           /* Do not read the rest of the line.  */
2532           return nowtok;
2533         }
2534       else if (nowtok == tok_else)
2535         {
2536           lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2537         }
2538
2539       lr_ignore_rest (ldfile, 0);
2540     }
2541 }
2542
2543
2544 void
2545 collate_read (struct linereader *ldfile, struct localedef_t *result,
2546               const struct charmap_t *charmap, const char *repertoire_name,
2547               int ignore_content)
2548 {
2549   struct repertoire_t *repertoire = NULL;
2550   struct locale_collate_t *collate;
2551   struct token *now;
2552   struct token *arg = NULL;
2553   enum token_t nowtok;
2554   enum token_t was_ellipsis = tok_none;
2555   struct localedef_t *copy_locale = NULL;
2556   /* Parsing state:
2557      0 - start
2558      1 - between `order-start' and `order-end'
2559      2 - after `order-end'
2560      3 - after `reorder-after', waiting for `reorder-end'
2561      4 - after `reorder-end'
2562      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2563      6 - after `reorder-sections-end'
2564   */
2565   int state = 0;
2566
2567   /* Get the repertoire we have to use.  */
2568   if (repertoire_name != NULL)
2569     repertoire = repertoire_read (repertoire_name);
2570
2571   /* The rest of the line containing `LC_COLLATE' must be free.  */
2572   lr_ignore_rest (ldfile, 1);
2573
2574   while (1)
2575     {
2576       do
2577         {
2578           now = lr_token (ldfile, charmap, result, NULL, verbose);
2579           nowtok = now->tok;
2580         }
2581       while (nowtok == tok_eol);
2582
2583       if (nowtok != tok_define)
2584         break;
2585
2586       if (ignore_content)
2587         lr_ignore_rest (ldfile, 0);
2588       else
2589         {
2590           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2591           if (arg->tok != tok_ident)
2592             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2593           else
2594             {
2595               /* Simply add the new symbol.  */
2596               struct name_list *newsym = xmalloc (sizeof (*newsym)
2597                                                   + arg->val.str.lenmb + 1);
2598               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2599               newsym->str[arg->val.str.lenmb] = '\0';
2600               newsym->next = defined;
2601               defined = newsym;
2602
2603               lr_ignore_rest (ldfile, 1);
2604             }
2605         }
2606     }
2607
2608   if (nowtok == tok_copy)
2609     {
2610       now = lr_token (ldfile, charmap, result, NULL, verbose);
2611       if (now->tok != tok_string)
2612         {
2613           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2614
2615         skip_category:
2616           do
2617             now = lr_token (ldfile, charmap, result, NULL, verbose);
2618           while (now->tok != tok_eof && now->tok != tok_end);
2619
2620           if (now->tok != tok_eof
2621               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2622                   now->tok == tok_eof))
2623             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2624           else if (now->tok != tok_lc_collate)
2625             {
2626               lr_error (ldfile, _("\
2627 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2628               lr_ignore_rest (ldfile, 0);
2629             }
2630           else
2631             lr_ignore_rest (ldfile, 1);
2632
2633           return;
2634         }
2635
2636       if (! ignore_content)
2637         {
2638           /* Get the locale definition.  */
2639           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2640                                      repertoire_name, charmap, NULL);
2641           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2642             {
2643               /* Not yet loaded.  So do it now.  */
2644               if (locfile_read (copy_locale, charmap) != 0)
2645                 goto skip_category;
2646             }
2647
2648           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2649             return;
2650         }
2651
2652       lr_ignore_rest (ldfile, 1);
2653
2654       now = lr_token (ldfile, charmap, result, NULL, verbose);
2655       nowtok = now->tok;
2656     }
2657
2658   /* Prepare the data structures.  */
2659   collate_startup (ldfile, result, copy_locale, ignore_content);
2660   collate = result->categories[LC_COLLATE].collate;
2661
2662   while (1)
2663     {
2664       char ucs4buf[10];
2665       char *symstr;
2666       size_t symlen;
2667
2668       /* Of course we don't proceed beyond the end of file.  */
2669       if (nowtok == tok_eof)
2670         break;
2671
2672       /* Ingore empty lines.  */
2673       if (nowtok == tok_eol)
2674         {
2675           now = lr_token (ldfile, charmap, result, NULL, verbose);
2676           nowtok = now->tok;
2677           continue;
2678         }
2679
2680       switch (nowtok)
2681         {
2682         case tok_copy:
2683           /* Allow copying other locales.  */
2684           now = lr_token (ldfile, charmap, result, NULL, verbose);
2685           if (now->tok != tok_string)
2686             goto err_label;
2687
2688           if (! ignore_content)
2689             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2690                          charmap, result);
2691
2692           lr_ignore_rest (ldfile, 1);
2693           break;
2694
2695         case tok_coll_weight_max:
2696           /* Ignore the rest of the line if we don't need the input of
2697              this line.  */
2698           if (ignore_content)
2699             {
2700               lr_ignore_rest (ldfile, 0);
2701               break;
2702             }
2703
2704           if (state != 0)
2705             goto err_label;
2706
2707           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2708           if (arg->tok != tok_number)
2709             goto err_label;
2710           if (collate->col_weight_max != -1)
2711             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2712                       "LC_COLLATE", "col_weight_max");
2713           else
2714             collate->col_weight_max = arg->val.num;
2715           lr_ignore_rest (ldfile, 1);
2716           break;
2717
2718         case tok_section_symbol:
2719           /* Ignore the rest of the line if we don't need the input of
2720              this line.  */
2721           if (ignore_content)
2722             {
2723               lr_ignore_rest (ldfile, 0);
2724               break;
2725             }
2726
2727           if (state != 0)
2728             goto err_label;
2729
2730           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2731           if (arg->tok != tok_bsymbol)
2732             goto err_label;
2733           else if (!ignore_content)
2734             {
2735               /* Check whether this section is already known.  */
2736               struct section_list *known = collate->sections;
2737               while (known != NULL)
2738                 {
2739                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2740                     break;
2741                   known = known->next;
2742                 }
2743
2744               if (known != NULL)
2745                 {
2746                   lr_error (ldfile,
2747                             _("%s: duplicate declaration of section `%s'"),
2748                             "LC_COLLATE", arg->val.str.startmb);
2749                   free (arg->val.str.startmb);
2750                 }
2751               else
2752                 collate->sections = make_seclist_elem (collate,
2753                                                        arg->val.str.startmb,
2754                                                        collate->sections);
2755
2756               lr_ignore_rest (ldfile, known == NULL);
2757             }
2758           else
2759             {
2760               free (arg->val.str.startmb);
2761               lr_ignore_rest (ldfile, 0);
2762             }
2763           break;
2764
2765         case tok_collating_element:
2766           /* Ignore the rest of the line if we don't need the input of
2767              this line.  */
2768           if (ignore_content)
2769             {
2770               lr_ignore_rest (ldfile, 0);
2771               break;
2772             }
2773
2774           if (state != 0 && state != 2)
2775             goto err_label;
2776
2777           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2778           if (arg->tok != tok_bsymbol)
2779             goto err_label;
2780           else
2781             {
2782               const char *symbol = arg->val.str.startmb;
2783               size_t symbol_len = arg->val.str.lenmb;
2784
2785               /* Next the `from' keyword.  */
2786               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2787               if (arg->tok != tok_from)
2788                 {
2789                   free ((char *) symbol);
2790                   goto err_label;
2791                 }
2792
2793               ldfile->return_widestr = 1;
2794               ldfile->translate_strings = 1;
2795
2796               /* Finally the string with the replacement.  */
2797               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2798
2799               ldfile->return_widestr = 0;
2800               ldfile->translate_strings = 0;
2801
2802               if (arg->tok != tok_string)
2803                 goto err_label;
2804
2805               if (!ignore_content && symbol != NULL)
2806                 {
2807                   /* The name is already defined.  */
2808                   if (check_duplicate (ldfile, collate, charmap,
2809                                        repertoire, symbol, symbol_len))
2810                     goto col_elem_free;
2811
2812                   if (arg->val.str.startmb != NULL)
2813                     insert_entry (&collate->elem_table, symbol, symbol_len,
2814                                   new_element (collate,
2815                                                arg->val.str.startmb,
2816                                                arg->val.str.lenmb - 1,
2817                                                arg->val.str.startwc,
2818                                                symbol, symbol_len, 0));
2819                 }
2820               else
2821                 {
2822                 col_elem_free:
2823                   free ((char *) symbol);
2824                   free (arg->val.str.startmb);
2825                   free (arg->val.str.startwc);
2826                 }
2827               lr_ignore_rest (ldfile, 1);
2828             }
2829           break;
2830
2831         case tok_collating_symbol:
2832           /* Ignore the rest of the line if we don't need the input of
2833              this line.  */
2834           if (ignore_content)
2835             {
2836               lr_ignore_rest (ldfile, 0);
2837               break;
2838             }
2839
2840           if (state != 0 && state != 2)
2841             goto err_label;
2842
2843           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2844           if (arg->tok != tok_bsymbol)
2845             goto err_label;
2846           else
2847             {
2848               char *symbol = arg->val.str.startmb;
2849               size_t symbol_len = arg->val.str.lenmb;
2850               char *endsymbol = NULL;
2851               size_t endsymbol_len = 0;
2852               enum token_t ellipsis = tok_none;
2853
2854               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2855               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2856                 {
2857                   ellipsis = arg->tok;
2858
2859                   arg = lr_token (ldfile, charmap, result, repertoire,
2860                                   verbose);
2861                   if (arg->tok != tok_bsymbol)
2862                     {
2863                       free (symbol);
2864                       goto err_label;
2865                     }
2866
2867                   endsymbol = arg->val.str.startmb;
2868                   endsymbol_len = arg->val.str.lenmb;
2869
2870                   lr_ignore_rest (ldfile, 1);
2871                 }
2872               else if (arg->tok != tok_eol)
2873                 {
2874                   free (symbol);
2875                   goto err_label;
2876                 }
2877
2878               if (!ignore_content)
2879                 {
2880                   if (symbol == NULL
2881                       || (ellipsis != tok_none && endsymbol == NULL))
2882                     {
2883                       lr_error (ldfile, _("\
2884 %s: unknown character in collating symbol name"),
2885                                 "LC_COLLATE");
2886                       goto col_sym_free;
2887                     }
2888                   else if (ellipsis == tok_none)
2889                     {
2890                       /* A single symbol, no ellipsis.  */
2891                       if (check_duplicate (ldfile, collate, charmap,
2892                                            repertoire, symbol, symbol_len))
2893                         /* The name is already defined.  */
2894                         goto col_sym_free;
2895
2896                       insert_entry (&collate->sym_table, symbol, symbol_len,
2897                                     new_symbol (collate, symbol, symbol_len));
2898                     }
2899                   else if (symbol_len != endsymbol_len)
2900                     {
2901                     col_sym_inv_range:
2902                       lr_error (ldfile,
2903                                 _("invalid names for character range"));
2904                       goto col_sym_free;
2905                     }
2906                   else
2907                     {
2908                       /* Oh my, we have to handle an ellipsis.  First, as
2909                          usual, determine the common prefix and then
2910                          convert the rest into a range.  */
2911                       size_t prefixlen;
2912                       unsigned long int from;
2913                       unsigned long int to;
2914                       char *endp;
2915
2916                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2917                         if (symbol[prefixlen] != endsymbol[prefixlen])
2918                           break;
2919
2920                       /* Convert the rest into numbers.  */
2921                       symbol[symbol_len] = '\0';
2922                       from = strtoul (&symbol[prefixlen], &endp,
2923                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2924                       if (*endp != '\0')
2925                         goto col_sym_inv_range;
2926
2927                       endsymbol[symbol_len] = '\0';
2928                       to = strtoul (&endsymbol[prefixlen], &endp,
2929                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2930                       if (*endp != '\0')
2931                         goto col_sym_inv_range;
2932
2933                       if (from > to)
2934                         goto col_sym_inv_range;
2935
2936                       /* Now loop over all entries.  */
2937                       while (from <= to)
2938                         {
2939                           char *symbuf;
2940
2941                           symbuf = (char *) obstack_alloc (&collate->mempool,
2942                                                            symbol_len + 1);
2943
2944                           /* Create the name.  */
2945                           sprintf (symbuf,
2946                                    ellipsis == tok_ellipsis2
2947                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2948                                    (int) prefixlen, symbol,
2949                                    (int) (symbol_len - prefixlen), from);
2950
2951                           if (check_duplicate (ldfile, collate, charmap,
2952                                                repertoire, symbuf, symbol_len))
2953                             /* The name is already defined.  */
2954                             goto col_sym_free;
2955
2956                           insert_entry (&collate->sym_table, symbuf,
2957                                         symbol_len,
2958                                         new_symbol (collate, symbuf,
2959                                                     symbol_len));
2960
2961                           /* Increment the counter.  */
2962                           ++from;
2963                         }
2964
2965                       goto col_sym_free;
2966                     }
2967                 }
2968               else
2969                 {
2970                 col_sym_free:
2971                   free (symbol);
2972                   free (endsymbol);
2973                 }
2974             }
2975           break;
2976
2977         case tok_symbol_equivalence:
2978           /* Ignore the rest of the line if we don't need the input of
2979              this line.  */
2980           if (ignore_content)
2981             {
2982               lr_ignore_rest (ldfile, 0);
2983               break;
2984             }
2985
2986           if (state != 0)
2987             goto err_label;
2988
2989           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2990           if (arg->tok != tok_bsymbol)
2991             goto err_label;
2992           else
2993             {
2994               const char *newname = arg->val.str.startmb;
2995               size_t newname_len = arg->val.str.lenmb;
2996               const char *symname;
2997               size_t symname_len;
2998               void *symval;     /* Actually struct symbol_t*  */
2999
3000               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3001               if (arg->tok != tok_bsymbol)
3002                 {
3003                   free ((char *) newname);
3004                   goto err_label;
3005                 }
3006
3007               symname = arg->val.str.startmb;
3008               symname_len = arg->val.str.lenmb;
3009
3010               if (newname == NULL)
3011                 {
3012                   lr_error (ldfile, _("\
3013 %s: unknown character in equivalent definition name"),
3014                             "LC_COLLATE");
3015
3016                 sym_equiv_free:
3017                   free ((char *) newname);
3018                   free ((char *) symname);
3019                   break;
3020                 }
3021               if (symname == NULL)
3022                 {
3023                   lr_error (ldfile, _("\
3024 %s: unknown character in equivalent definition value"),
3025                             "LC_COLLATE");
3026                   goto sym_equiv_free;
3027                 }
3028
3029               /* See whether the symbol name is already defined.  */
3030               if (find_entry (&collate->sym_table, symname, symname_len,
3031                               &symval) != 0)
3032                 {
3033                   lr_error (ldfile, _("\
3034 %s: unknown symbol `%s' in equivalent definition"),
3035                             "LC_COLLATE", symname);
3036                   goto sym_equiv_free;
3037                 }
3038
3039               if (insert_entry (&collate->sym_table,
3040                                 newname, newname_len, symval) < 0)
3041                 {
3042                   lr_error (ldfile, _("\
3043 error while adding equivalent collating symbol"));
3044                   goto sym_equiv_free;
3045                 }
3046
3047               free ((char *) symname);
3048             }
3049           lr_ignore_rest (ldfile, 1);
3050           break;
3051
3052         case tok_script:
3053           /* Ignore the rest of the line if we don't need the input of
3054              this line.  */
3055           if (ignore_content)
3056             {
3057               lr_ignore_rest (ldfile, 0);
3058               break;
3059             }
3060
3061           /* We get told about the scripts we know.  */
3062           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3063           if (arg->tok != tok_bsymbol)
3064             goto err_label;
3065           else
3066             {
3067               struct section_list *runp = collate->known_sections;
3068               char *name;
3069
3070               while (runp != NULL)
3071                 if (strncmp (runp->name, arg->val.str.startmb,
3072                              arg->val.str.lenmb) == 0
3073                     && runp->name[arg->val.str.lenmb] == '\0')
3074                   break;
3075                 else
3076                   runp = runp->def_next;
3077
3078               if (runp != NULL)
3079                 {
3080                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3081                             runp->name);
3082                   lr_ignore_rest (ldfile, 0);
3083                   break;
3084                 }
3085
3086               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3087               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3088               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3089               name[arg->val.str.lenmb] = '\0';
3090               runp->name = name;
3091
3092               runp->def_next = collate->known_sections;
3093               collate->known_sections = runp;
3094             }
3095           lr_ignore_rest (ldfile, 1);
3096           break;
3097
3098         case tok_order_start:
3099           /* Ignore the rest of the line if we don't need the input of
3100              this line.  */
3101           if (ignore_content)
3102             {
3103               lr_ignore_rest (ldfile, 0);
3104               break;
3105             }
3106
3107           if (state != 0 && state != 1 && state != 2)
3108             goto err_label;
3109           state = 1;
3110
3111           /* The 14652 draft does not specify whether all `order_start' lines
3112              must contain the same number of sort-rules, but 14651 does.  So
3113              we require this here as well.  */
3114           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3115           if (arg->tok == tok_bsymbol)
3116             {
3117               /* This better should be a section name.  */
3118               struct section_list *sp = collate->known_sections;
3119               while (sp != NULL
3120                      && (sp->name == NULL
3121                          || strncmp (sp->name, arg->val.str.startmb,
3122                                      arg->val.str.lenmb) != 0
3123                          || sp->name[arg->val.str.lenmb] != '\0'))
3124                 sp = sp->def_next;
3125
3126               if (sp == NULL)
3127                 {
3128                   lr_error (ldfile, _("\
3129 %s: unknown section name `%.*s'"),
3130                             "LC_COLLATE", (int) arg->val.str.lenmb,
3131                             arg->val.str.startmb);
3132                   /* We use the error section.  */
3133                   collate->current_section = &collate->error_section;
3134
3135                   if (collate->error_section.first == NULL)
3136                     {
3137                       /* Insert &collate->error_section at the end of
3138                          the collate->sections list.  */
3139                       if (collate->sections == NULL)
3140                         collate->sections = &collate->error_section;
3141                       else
3142                         {
3143                           sp = collate->sections;
3144                           while (sp->next != NULL)
3145                             sp = sp->next;
3146
3147                           sp->next = &collate->error_section;
3148                         }
3149                       collate->error_section.next = NULL;
3150                     }
3151                 }
3152               else
3153                 {
3154                   /* One should not be allowed to open the same
3155                      section twice.  */
3156                   if (sp->first != NULL)
3157                     lr_error (ldfile, _("\
3158 %s: multiple order definitions for section `%s'"),
3159                               "LC_COLLATE", sp->name);
3160                   else
3161                     {
3162                       /* Insert sp in the collate->sections list,
3163                          right after collate->current_section.  */
3164                       if (collate->current_section != NULL)
3165                         {
3166                           sp->next = collate->current_section->next;
3167                           collate->current_section->next = sp;
3168                         }
3169                       else if (collate->sections == NULL)
3170                         /* This is the first section to be defined.  */
3171                         collate->sections = sp;
3172
3173                       collate->current_section = sp;
3174                     }
3175
3176                   /* Next should come the end of the line or a semicolon.  */
3177                   arg = lr_token (ldfile, charmap, result, repertoire,
3178                                   verbose);
3179                   if (arg->tok == tok_eol)
3180                     {
3181                       uint32_t cnt;
3182
3183                       /* This means we have exactly one rule: `forward'.  */
3184                       if (nrules > 1)
3185                         lr_error (ldfile, _("\
3186 %s: invalid number of sorting rules"),
3187                                   "LC_COLLATE");
3188                       else
3189                         nrules = 1;
3190                       sp->rules = obstack_alloc (&collate->mempool,
3191                                                  (sizeof (enum coll_sort_rule)
3192                                                   * nrules));
3193                       for (cnt = 0; cnt < nrules; ++cnt)
3194                         sp->rules[cnt] = sort_forward;
3195
3196                       /* Next line.  */
3197                       break;
3198                     }
3199
3200                   /* Get the next token.  */
3201                   arg = lr_token (ldfile, charmap, result, repertoire,
3202                                   verbose);
3203                 }
3204             }
3205           else
3206             {
3207               /* There is no section symbol.  Therefore we use the unnamed
3208                  section.  */
3209               collate->current_section = &collate->unnamed_section;
3210
3211               if (collate->unnamed_section_defined)
3212                 lr_error (ldfile, _("\
3213 %s: multiple order definitions for unnamed section"),
3214                           "LC_COLLATE");
3215               else
3216                 {
3217                   /* Insert &collate->unnamed_section at the beginning of
3218                      the collate->sections list.  */
3219                   collate->unnamed_section.next = collate->sections;
3220                   collate->sections = &collate->unnamed_section;
3221                   collate->unnamed_section_defined = true;
3222                 }
3223             }
3224
3225           /* Now read the direction names.  */
3226           read_directions (ldfile, arg, charmap, repertoire, result);
3227
3228           /* From now we need the strings untranslated.  */
3229           ldfile->translate_strings = 0;
3230           break;
3231
3232         case tok_order_end:
3233           /* Ignore the rest of the line if we don't need the input of
3234              this line.  */
3235           if (ignore_content)
3236             {
3237               lr_ignore_rest (ldfile, 0);
3238               break;
3239             }
3240
3241           if (state != 1)
3242             goto err_label;
3243
3244           /* Handle ellipsis at end of list.  */
3245           if (was_ellipsis != tok_none)
3246             {
3247               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3248                                repertoire, result);
3249               was_ellipsis = tok_none;
3250             }
3251
3252           state = 2;
3253           lr_ignore_rest (ldfile, 1);
3254           break;
3255
3256         case tok_reorder_after:
3257           /* Ignore the rest of the line if we don't need the input of
3258              this line.  */
3259           if (ignore_content)
3260             {
3261               lr_ignore_rest (ldfile, 0);
3262               break;
3263             }
3264
3265           if (state == 1)
3266             {
3267               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3268                         "LC_COLLATE");
3269               state = 2;
3270
3271               /* Handle ellipsis at end of list.  */
3272               if (was_ellipsis != tok_none)
3273                 {
3274                   handle_ellipsis (ldfile, arg->val.str.startmb,
3275                                    arg->val.str.lenmb, was_ellipsis, charmap,
3276                                    repertoire, result);
3277                   was_ellipsis = tok_none;
3278                 }
3279             }
3280           else if (state == 0 && copy_locale == NULL)
3281             goto err_label;
3282           else if (state != 0 && state != 2 && state != 3)
3283             goto err_label;
3284           state = 3;
3285
3286           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3287           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3288             {
3289               /* Find this symbol in the sequence table.  */
3290               char ucsbuf[10];
3291               char *startmb;
3292               size_t lenmb;
3293               struct element_t *insp;
3294               int no_error = 1;
3295               void *ptr;
3296
3297               if (arg->tok == tok_bsymbol)
3298                 {
3299                   startmb = arg->val.str.startmb;
3300                   lenmb = arg->val.str.lenmb;
3301                 }
3302               else
3303                 {
3304                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3305                   startmb = ucsbuf;
3306                   lenmb = 9;
3307                 }
3308
3309               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3310                 /* Yes, the symbol exists.  Simply point the cursor
3311                    to it.  */
3312                 collate->cursor = (struct element_t *) ptr;
3313               else
3314                 {
3315                   struct symbol_t *symbp;
3316                   void *ptr;
3317
3318                   if (find_entry (&collate->sym_table, startmb, lenmb,
3319                                   &ptr) == 0)
3320                     {
3321                       symbp = ptr;
3322
3323                       if (symbp->order->last != NULL
3324                           || symbp->order->next != NULL)
3325                         collate->cursor = symbp->order;
3326                       else
3327                         {
3328                           /* This is a collating symbol but its position
3329                              is not yet defined.  */
3330                           lr_error (ldfile, _("\
3331 %s: order for collating symbol %.*s not yet defined"),
3332                                     "LC_COLLATE", (int) lenmb, startmb);
3333                           collate->cursor = NULL;
3334                           no_error = 0;
3335                         }
3336                     }
3337                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3338                                        &ptr) == 0)
3339                     {
3340                       insp = (struct element_t *) ptr;
3341
3342                       if (insp->last != NULL || insp->next != NULL)
3343                         collate->cursor = insp;
3344                       else
3345                         {
3346                           /* This is a collating element but its position
3347                              is not yet defined.  */
3348                           lr_error (ldfile, _("\
3349 %s: order for collating element %.*s not yet defined"),
3350                                     "LC_COLLATE", (int) lenmb, startmb);
3351                           collate->cursor = NULL;
3352                           no_error = 0;
3353                         }
3354                     }
3355                   else
3356                     {
3357                       /* This is bad.  The symbol after which we have to
3358                          insert does not exist.  */
3359                       lr_error (ldfile, _("\
3360 %s: cannot reorder after %.*s: symbol not known"),
3361                                 "LC_COLLATE", (int) lenmb, startmb);
3362                       collate->cursor = NULL;
3363                       no_error = 0;
3364                     }
3365                 }
3366
3367               lr_ignore_rest (ldfile, no_error);
3368             }
3369           else
3370             /* This must not happen.  */
3371             goto err_label;
3372           break;
3373
3374         case tok_reorder_end:
3375           /* Ignore the rest of the line if we don't need the input of
3376              this line.  */
3377           if (ignore_content)
3378             break;
3379
3380           if (state != 3)
3381             goto err_label;
3382           state = 4;
3383           lr_ignore_rest (ldfile, 1);
3384           break;
3385
3386         case tok_reorder_sections_after:
3387           /* Ignore the rest of the line if we don't need the input of
3388              this line.  */
3389           if (ignore_content)
3390             {
3391               lr_ignore_rest (ldfile, 0);
3392               break;
3393             }
3394
3395           if (state == 1)
3396             {
3397               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3398                         "LC_COLLATE");
3399               state = 2;
3400
3401               /* Handle ellipsis at end of list.  */
3402               if (was_ellipsis != tok_none)
3403                 {
3404                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3405                                    repertoire, result);
3406                   was_ellipsis = tok_none;
3407                 }
3408             }
3409           else if (state == 3)
3410             {
3411               WITH_CUR_LOCALE (error (0, 0, _("\
3412 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3413               state = 4;
3414             }
3415           else if (state != 2 && state != 4)
3416             goto err_label;
3417           state = 5;
3418
3419           /* Get the name of the sections we are adding after.  */
3420           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3421           if (arg->tok == tok_bsymbol)
3422             {
3423               /* Now find a section with this name.  */
3424               struct section_list *runp = collate->sections;
3425
3426               while (runp != NULL)
3427                 {
3428                   if (runp->name != NULL
3429                       && strlen (runp->name) == arg->val.str.lenmb
3430                       && memcmp (runp->name, arg->val.str.startmb,
3431                                  arg->val.str.lenmb) == 0)
3432                     break;
3433
3434                   runp = runp->next;
3435                 }
3436
3437               if (runp != NULL)
3438                 collate->current_section = runp;
3439               else
3440                 {
3441                   /* This is bad.  The section after which we have to
3442                      reorder does not exist.  Therefore we cannot
3443                      process the whole rest of this reorder
3444                      specification.  */
3445                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3446                             "LC_COLLATE", (int) arg->val.str.lenmb,
3447                             arg->val.str.startmb);
3448
3449                   do
3450                     {
3451                       lr_ignore_rest (ldfile, 0);
3452
3453                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3454                     }
3455                   while (now->tok == tok_reorder_sections_after
3456                          || now->tok == tok_reorder_sections_end
3457                          || now->tok == tok_end);
3458
3459                   /* Process the token we just saw.  */
3460                   nowtok = now->tok;
3461                   continue;
3462                 }
3463             }
3464           else
3465             /* This must not happen.  */
3466             goto err_label;
3467           break;
3468
3469         case tok_reorder_sections_end:
3470           /* Ignore the rest of the line if we don't need the input of
3471              this line.  */
3472           if (ignore_content)
3473             break;
3474
3475           if (state != 5)
3476             goto err_label;
3477           state = 6;
3478           lr_ignore_rest (ldfile, 1);
3479           break;
3480
3481         case tok_bsymbol:
3482         case tok_ucs4:
3483           /* Ignore the rest of the line if we don't need the input of
3484              this line.  */
3485           if (ignore_content)
3486             {
3487               lr_ignore_rest (ldfile, 0);
3488               break;
3489             }
3490
3491           if (state != 0 && state != 1 && state != 3 && state != 5)
3492             goto err_label;
3493
3494           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3495             goto err_label;
3496
3497           if (nowtok == tok_ucs4)
3498             {
3499               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3500               symstr = ucs4buf;
3501               symlen = 9;
3502             }
3503           else if (arg != NULL)
3504             {
3505               symstr = arg->val.str.startmb;
3506               symlen = arg->val.str.lenmb;
3507             }
3508           else
3509             {
3510               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3511                         (int) ldfile->token.val.str.lenmb,
3512                         ldfile->token.val.str.startmb);
3513               break;
3514             }
3515
3516           struct element_t *seqp;
3517           if (state == 0)
3518             {
3519               /* We are outside an `order_start' region.  This means
3520                  we must only accept definitions of values for
3521                  collation symbols since these are purely abstract
3522                  values and don't need directions associated.  */
3523               void *ptr;
3524
3525               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3526                 {
3527                   seqp = ptr;
3528
3529                   /* It's already defined.  First check whether this
3530                      is really a collating symbol.  */
3531                   if (seqp->is_character)
3532                     goto err_label;
3533
3534                   goto move_entry;
3535                 }
3536               else
3537                 {
3538                   void *result;
3539
3540                   if (find_entry (&collate->sym_table, symstr, symlen,
3541                                   &result) != 0)
3542                     /* No collating symbol, it's an error.  */
3543                     goto err_label;
3544
3545                   /* Maybe this is the first time we define a symbol
3546                      value and it is before the first actual section.  */
3547                   if (collate->sections == NULL)
3548                     collate->sections = collate->current_section =
3549                       &collate->symbol_section;
3550                 }
3551
3552               if (was_ellipsis != tok_none)
3553                 {
3554                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3555                                    charmap, repertoire, result);
3556
3557                   /* Remember that we processed the ellipsis.  */
3558                   was_ellipsis = tok_none;
3559
3560                   /* And don't add the value a second time.  */
3561                   break;
3562                 }
3563             }
3564           else if (state == 3)
3565             {
3566               /* It is possible that we already have this collation sequence.
3567                  In this case we move the entry.  */
3568               void *sym;
3569               void *ptr;
3570
3571               /* If the symbol after which we have to insert was not found
3572                  ignore all entries.  */
3573               if (collate->cursor == NULL)
3574                 {
3575                   lr_ignore_rest (ldfile, 0);
3576                   break;
3577                 }
3578
3579               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3580                 {
3581                   seqp = (struct element_t *) ptr;
3582                   goto move_entry;
3583                 }
3584
3585               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3586                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3587                 goto move_entry;
3588
3589               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3590                   && (seqp = (struct element_t *) ptr,
3591                       seqp->last != NULL || seqp->next != NULL
3592                       || (collate->start != NULL && seqp == collate->start)))
3593                 {
3594                 move_entry:
3595                   /* Remove the entry from the old position.  */
3596                   if (seqp->last == NULL)
3597                     collate->start = seqp->next;
3598                   else
3599                     seqp->last->next = seqp->next;
3600                   if (seqp->next != NULL)
3601                     seqp->next->last = seqp->last;
3602
3603                   /* We also have to check whether this entry is the
3604                      first or last of a section.  */
3605                   if (seqp->section->first == seqp)
3606                     {
3607                       if (seqp->section->first == seqp->section->last)
3608                         /* This section has no content anymore.  */
3609                         seqp->section->first = seqp->section->last = NULL;
3610                       else
3611                         seqp->section->first = seqp->next;
3612                     }
3613                   else if (seqp->section->last == seqp)
3614                     seqp->section->last = seqp->last;
3615
3616                   /* Now insert it in the new place.  */
3617                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3618                                   tok_none);
3619                   break;
3620                 }
3621
3622               /* Otherwise we just add a new entry.  */
3623             }
3624           else if (state == 5)
3625             {
3626               /* We are reordering sections.  Find the named section.  */
3627               struct section_list *runp = collate->sections;
3628               struct section_list *prevp = NULL;
3629
3630               while (runp != NULL)
3631                 {
3632                   if (runp->name != NULL
3633                       && strlen (runp->name) == symlen
3634                       && memcmp (runp->name, symstr, symlen) == 0)
3635                     break;
3636
3637                   prevp = runp;
3638                   runp = runp->next;
3639                 }
3640
3641               if (runp == NULL)
3642                 {
3643                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3644                             "LC_COLLATE", (int) symlen, symstr);
3645                   lr_ignore_rest (ldfile, 0);
3646                 }
3647               else
3648                 {
3649                   if (runp != collate->current_section)
3650                     {
3651                       /* Remove the named section from the old place and
3652                          insert it in the new one.  */
3653                       prevp->next = runp->next;
3654
3655                       runp->next = collate->current_section->next;
3656                       collate->current_section->next = runp;
3657                       collate->current_section = runp;
3658                     }
3659
3660                   /* Process the rest of the line which might change
3661                      the collation rules.  */
3662                   arg = lr_token (ldfile, charmap, result, repertoire,
3663                                   verbose);
3664                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3665                     read_directions (ldfile, arg, charmap, repertoire,
3666                                      result);
3667                 }
3668               break;
3669             }
3670           else if (was_ellipsis != tok_none)
3671             {
3672               /* Using the information in the `ellipsis_weight'
3673                  element and this and the last value we have to handle
3674                  the ellipsis now.  */
3675               assert (state == 1);
3676
3677               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3678                                repertoire, result);
3679
3680               /* Remember that we processed the ellipsis.  */
3681               was_ellipsis = tok_none;
3682
3683               /* And don't add the value a second time.  */
3684               break;
3685             }
3686
3687           /* Now insert in the new place.  */
3688           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3689           break;
3690
3691         case tok_undefined:
3692           /* Ignore the rest of the line if we don't need the input of
3693              this line.  */
3694           if (ignore_content)
3695             {
3696               lr_ignore_rest (ldfile, 0);
3697               break;
3698             }
3699
3700           if (state != 1)
3701             goto err_label;
3702
3703           if (was_ellipsis != tok_none)
3704             {
3705               lr_error (ldfile,
3706                         _("%s: cannot have `%s' as end of ellipsis range"),
3707                         "LC_COLLATE", "UNDEFINED");
3708
3709               unlink_element (collate);
3710               was_ellipsis = tok_none;
3711             }
3712
3713           /* See whether UNDEFINED already appeared somewhere.  */
3714           if (collate->undefined.next != NULL
3715               || &collate->undefined == collate->cursor)
3716             {
3717               lr_error (ldfile,
3718                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3719                         "LC_COLLATE", 9, "UNDEFINED",
3720                         collate->undefined.file,
3721                         collate->undefined.line);
3722               lr_ignore_rest (ldfile, 0);
3723             }
3724           else
3725             /* Parse the weights.  */
3726              insert_weights (ldfile, &collate->undefined, charmap,
3727                              repertoire, result, tok_none);
3728           break;
3729
3730         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3731         case tok_ellipsis3: /* absolute ellipsis */
3732         case tok_ellipsis4: /* symbolic decimal ellipsis */
3733           /* This is the symbolic (decimal or hexadecimal) or absolute
3734              ellipsis.  */
3735           if (was_ellipsis != tok_none)
3736             goto err_label;
3737
3738           if (state != 0 && state != 1 && state != 3)
3739             goto err_label;
3740
3741           was_ellipsis = nowtok;
3742
3743           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3744                           repertoire, result, nowtok);
3745           break;
3746
3747         case tok_end:
3748         seen_end:
3749           /* Next we assume `LC_COLLATE'.  */
3750           if (!ignore_content)
3751             {
3752               if (state == 0 && copy_locale == NULL)
3753                 /* We must either see a copy statement or have
3754                    ordering values.  */
3755                 lr_error (ldfile,
3756                           _("%s: empty category description not allowed"),
3757                           "LC_COLLATE");
3758               else if (state == 1)
3759                 {
3760                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3761                             "LC_COLLATE");
3762
3763                   /* Handle ellipsis at end of list.  */
3764                   if (was_ellipsis != tok_none)
3765                     {
3766                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3767                                        repertoire, result);
3768                       was_ellipsis = tok_none;
3769                     }
3770                 }
3771               else if (state == 3)
3772                 WITH_CUR_LOCALE (error (0, 0, _("\
3773 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3774               else if (state == 5)
3775                 WITH_CUR_LOCALE (error (0, 0, _("\
3776 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3777             }
3778           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3779           if (arg->tok == tok_eof)
3780             break;
3781           if (arg->tok == tok_eol)
3782             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3783           else if (arg->tok != tok_lc_collate)
3784             lr_error (ldfile, _("\
3785 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3786           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3787           return;
3788
3789         case tok_define:
3790           if (ignore_content)
3791             {
3792               lr_ignore_rest (ldfile, 0);
3793               break;
3794             }
3795
3796           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3797           if (arg->tok != tok_ident)
3798             goto err_label;
3799
3800           /* Simply add the new symbol.  */
3801           struct name_list *newsym = xmalloc (sizeof (*newsym)
3802                                               + arg->val.str.lenmb + 1);
3803           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3804           newsym->str[arg->val.str.lenmb] = '\0';
3805           newsym->next = defined;
3806           defined = newsym;
3807
3808           lr_ignore_rest (ldfile, 1);
3809           break;
3810
3811         case tok_undef:
3812           if (ignore_content)
3813             {
3814               lr_ignore_rest (ldfile, 0);
3815               break;
3816             }
3817
3818           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3819           if (arg->tok != tok_ident)
3820             goto err_label;
3821
3822           /* Remove _all_ occurrences of the symbol from the list.  */
3823           struct name_list *prevdef = NULL;
3824           struct name_list *curdef = defined;
3825           while (curdef != NULL)
3826             if (strncmp (arg->val.str.startmb, curdef->str,
3827                          arg->val.str.lenmb) == 0
3828                 && curdef->str[arg->val.str.lenmb] == '\0')
3829               {
3830                 if (prevdef == NULL)
3831                   defined = curdef->next;
3832                 else
3833                   prevdef->next = curdef->next;
3834
3835                 struct name_list *olddef = curdef;
3836                 curdef = curdef->next;
3837
3838                 free (olddef);
3839               }
3840             else
3841               {
3842                 prevdef = curdef;
3843                 curdef = curdef->next;
3844               }
3845
3846           lr_ignore_rest (ldfile, 1);
3847           break;
3848
3849         case tok_ifdef:
3850         case tok_ifndef:
3851           if (ignore_content)
3852             {
3853               lr_ignore_rest (ldfile, 0);
3854               break;
3855             }
3856
3857         found_ifdef:
3858           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3859           if (arg->tok != tok_ident)
3860             goto err_label;
3861           lr_ignore_rest (ldfile, 1);
3862
3863           if (collate->else_action == else_none)
3864             {
3865               curdef = defined;
3866               while (curdef != NULL)
3867                 if (strncmp (arg->val.str.startmb, curdef->str,
3868                              arg->val.str.lenmb) == 0
3869                     && curdef->str[arg->val.str.lenmb] == '\0')
3870                   break;
3871                 else
3872                   curdef = curdef->next;
3873
3874               if ((nowtok == tok_ifdef && curdef != NULL)
3875                   || (nowtok == tok_ifndef && curdef == NULL))
3876                 {
3877                   /* We have to use the if-branch.  */
3878                   collate->else_action = else_ignore;
3879                 }
3880               else
3881                 {
3882                   /* We have to use the else-branch, if there is one.  */
3883                   nowtok = skip_to (ldfile, collate, charmap, 0);
3884                   if (nowtok == tok_else)
3885                     collate->else_action = else_seen;
3886                   else if (nowtok == tok_elifdef)
3887                     {
3888                       nowtok = tok_ifdef;
3889                       goto found_ifdef;
3890                     }
3891                   else if (nowtok == tok_elifndef)
3892                     {
3893                       nowtok = tok_ifndef;
3894                       goto found_ifdef;
3895                     }
3896                   else if (nowtok == tok_eof)
3897                     goto seen_eof;
3898                   else if (nowtok == tok_end)
3899                     goto seen_end;
3900                 }
3901             }
3902           else
3903             {
3904               /* XXX Should it really become necessary to support nested
3905                  preprocessor handling we will push the state here.  */
3906               lr_error (ldfile, _("%s: nested conditionals not supported"),
3907                         "LC_COLLATE");
3908               nowtok = skip_to (ldfile, collate, charmap, 1);
3909               if (nowtok == tok_eof)
3910                 goto seen_eof;
3911               else if (nowtok == tok_end)
3912                 goto seen_end;
3913             }
3914           break;
3915
3916         case tok_elifdef:
3917         case tok_elifndef:
3918         case tok_else:
3919           if (ignore_content)
3920             {
3921               lr_ignore_rest (ldfile, 0);
3922               break;
3923             }
3924
3925           lr_ignore_rest (ldfile, 1);
3926
3927           if (collate->else_action == else_ignore)
3928             {
3929               /* Ignore everything until the endif.  */
3930               nowtok = skip_to (ldfile, collate, charmap, 1);
3931               if (nowtok == tok_eof)
3932                 goto seen_eof;
3933               else if (nowtok == tok_end)
3934                 goto seen_end;
3935             }
3936           else
3937             {
3938               assert (collate->else_action == else_none);
3939               lr_error (ldfile, _("\
3940 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3941                         nowtok == tok_else ? "else"
3942                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3943             }
3944           break;
3945
3946         case tok_endif:
3947           if (ignore_content)
3948             {
3949               lr_ignore_rest (ldfile, 0);
3950               break;
3951             }
3952
3953           lr_ignore_rest (ldfile, 1);
3954
3955           if (collate->else_action != else_ignore
3956               && collate->else_action != else_seen)
3957             lr_error (ldfile, _("\
3958 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3959
3960           /* XXX If we support nested preprocessor directives we pop
3961              the state here.  */
3962           collate->else_action = else_none;
3963           break;
3964
3965         default:
3966         err_label:
3967           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3968         }
3969
3970       /* Prepare for the next round.  */
3971       now = lr_token (ldfile, charmap, result, NULL, verbose);
3972       nowtok = now->tok;
3973     }
3974
3975  seen_eof:
3976   /* When we come here we reached the end of the file.  */
3977   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3978 }