locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <errno.h>
  25 #include <error.h>
  26 #include <stdlib.h>
  27 #include <wchar.h>
  28 #include <sys/param.h>
  29
  30 #include "localedef.h"
  31 #include "charmap.h"
  32 #include "localeinfo.h"
  33 #include "linereader.h"
  34 #include "locfile.h"
  35 #include "elem-hash.h"
  36
  37 /* Uncomment the following line in the production version.  */
  38 /* #define NDEBUG 1 */
  39 #include <assert.h>
  40
  41 #define obstack_chunk_alloc malloc
  42 #define obstack_chunk_free free
  43
  44 static inline void
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  55 {
  56   if (sizeof (int32_t) == sizeof (int))
  57     obstack_int_grow_fast (obstack, data);
  58   else
  59     obstack_grow (obstack, &data, sizeof (int32_t));
  60 }
  61
  62 /* Forward declaration.  */
  63 struct element_t;
  64
  65 /* Data type for list of strings.  */
  66 struct section_list
  67 {
  68   /* Successor in the known_sections list.  */
  69   struct section_list *def_next;
  70   /* Successor in the sections list.  */
  71   struct section_list *next;
  72   /* Name of the section.  */
  73   const char *name;
  74   /* First element of this section.  */
  75   struct element_t *first;
  76   /* Last element of this section.  */
  77   struct element_t *last;
  78   /* These are the rules for this section.  */
  79   enum coll_sort_rule *rules;
  80   /* Index of the rule set in the appropriate section of the output file.  */
  81   int ruleidx;
  82 };
  83
  84 struct element_t;
  85
  86 struct element_list_t
  87 {
  88   /* Number of elements.  */
  89   int cnt;
  90
  91   struct element_t **w;
  92 };
  93
  94 /* Data type for collating element.  */
  95 struct element_t
  96 {
  97   const char *name;
  98
  99   const char *mbs;
 100   size_t nmbs;
 101   const uint32_t *wcs;
 102   size_t nwcs;
 103   int *mborder;
 104   int wcorder;
 105
 106   /* The following is a bit mask which bits are set if this element is
 107      used in the appropriate level.  Interesting for the singlebyte
 108      weight computation.
 109
 110      XXX The type here restricts the number of levels to 32.  It could
 111      be changed if necessary but I doubt this is necessary.  */
 112   unsigned int used_in_level;
 113
 114   struct element_list_t *weights;
 115
 116   /* Nonzero if this is a real character definition.  */
 117   int is_character;
 118
 119   /* Order of the character in the sequence.  This information will
 120      be used in range expressions.  */
 121   int mbseqorder;
 122   int wcseqorder;
 123
 124   /* Where does the definition come from.  */
 125   const char *file;
 126   size_t line;
 127
 128   /* Which section does this belong to.  */
 129   struct section_list *section;
 130
 131   /* Predecessor and successor in the order list.  */
 132   struct element_t *last;
 133   struct element_t *next;
 134
 135   /* Next element in multibyte output list.  */
 136   struct element_t *mbnext;
 137   struct element_t *mblast;
 138
 139   /* Next element in wide character output list.  */
 140   struct element_t *wcnext;
 141   struct element_t *wclast;
 142 };
 143
 144 /* Special element value.  */
 145 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 146 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 147 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 148
 149 /* Data type for collating symbol.  */
 150 struct symbol_t
 151 {
 152   const char *name;
 153
 154   /* Point to place in the order list.  */
 155   struct element_t *order;
 156
 157   /* Where does the definition come from.  */
 158   const char *file;
 159   size_t line;
 160 };
 161
 162 /* Sparse table of struct element_t *.  */
 163 #define TABLE wchead_table
 164 #define ELEMENT struct element_t *
 165 #define DEFAULT NULL
 166 #define ITERATE
 167 #define NO_FINALIZE
 168 #include "3level.h"
 169
 170 /* Sparse table of int32_t.  */
 171 #define TABLE collidx_table
 172 #define ELEMENT int32_t
 173 #define DEFAULT 0
 174 #include "3level.h"
 175
 176 /* Sparse table of uint32_t.  */
 177 #define TABLE collseq_table
 178 #define ELEMENT uint32_t
 179 #define DEFAULT ~((uint32_t) 0)
 180 #include "3level.h"
 181
 182
 183 /* The real definition of the struct for the LC_COLLATE locale.  */
 184 struct locale_collate_t
 185 {
 186   int col_weight_max;
 187   int cur_weight_max;
 188
 189   /* List of known scripts.  */
 190   struct section_list *known_sections;
 191   /* List of used sections.  */
 192   struct section_list *sections;
 193   /* Current section using definition.  */
 194   struct section_list *current_section;
 195   /* There always can be an unnamed section.  */
 196   struct section_list unnamed_section;
 197   /* To make handling of errors easier we have another section.  */
 198   struct section_list error_section;
 199   /* Sometimes we are defining the values for collating symbols before
 200      the first actual section.  */
 201   struct section_list symbol_section;
 202
 203   /* Start of the order list.  */
 204   struct element_t *start;
 205
 206   /* The undefined element.  */
 207   struct element_t undefined;
 208
 209   /* This is the cursor for `reorder_after' insertions.  */
 210   struct element_t *cursor;
 211
 212   /* This value is used when handling ellipsis.  */
 213   struct element_t ellipsis_weight;
 214
 215   /* Known collating elements.  */
 216   hash_table elem_table;
 217
 218   /* Known collating symbols.  */
 219   hash_table sym_table;
 220
 221   /* Known collation sequences.  */
 222   hash_table seq_table;
 223
 224   struct obstack mempool;
 225
 226   /* The LC_COLLATE category is a bit special as it is sometimes possible
 227      that the definitions from more than one input file contains information.
 228      Therefore we keep all relevant input in a list.  */
 229   struct locale_collate_t *next;
 230
 231   /* Arrays with heads of the list for each of the leading bytes in
 232      the multibyte sequences.  */
 233   struct element_t *mbheads[256];
 234
 235   /* Arrays with heads of the list for each of the leading bytes in
 236      the multibyte sequences.  */
 237   struct wchead_table wcheads;
 238
 239   /* The arrays with the collation sequence order.  */
 240   unsigned char mbseqorder[256];
 241   struct collseq_table wcseqorder;
 242 };
 243
 244
 245 /* We have a few global variables which are used for reading all
 246    LC_COLLATE category descriptions in all files.  */
 247 static uint32_t nrules;
 248
 249
 250 /* We need UTF-8 encoding of numbers.  */
 251 static inline int
 252 utf8_encode (char *buf, int val)
 253 {
 254   int retval;
 255
 256   if (val < 0x80)
 257     {
 258       *buf++ = (char) val;
 259       retval = 1;
 260     }
 261   else
 262     {
 263       int step;
 264
 265       for (step = 2; step < 6; ++step)
 266         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 267           break;
 268       retval = step;
 269
 270       *buf = (unsigned char) (~0xff >> step);
 271       --step;
 272       do
 273         {
 274           buf[step] = 0x80 | (val & 0x3f);
 275           val >>= 6;
 276         }
 277       while (--step > 0);
 278       *buf |= val;
 279     }
 280
 281   return retval;
 282 }
 283
 284
 285 static struct section_list *
 286 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 287                    struct section_list *next)
 288 {
 289   struct section_list *newp;
 290
 291   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 292                                                 sizeof (*newp));
 293   newp->next = next;
 294   newp->name = string;
 295   newp->first = NULL;
 296   newp->last = NULL;
 297
 298   return newp;
 299 }
 300
 301
 302 static struct element_t *
 303 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 304              const uint32_t *wcs, const char *name, size_t namelen,
 305              int is_character)
 306 {
 307   struct element_t *newp;
 308
 309   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 310                                              sizeof (*newp));
 311   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 312                                                     name, namelen);
 313   if (mbs != NULL)
 314     {
 315       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 316       newp->nmbs = mbslen;
 317     }
 318   else
 319     {
 320       newp->mbs = NULL;
 321       newp->nmbs = 0;
 322     }
 323   if (wcs != NULL)
 324     {
 325       size_t nwcs = wcslen ((wchar_t *) wcs);
 326       uint32_t zero = 0;
 327       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 328       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 329       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 330       newp->nwcs = nwcs;
 331     }
 332   else
 333     {
 334       newp->wcs = NULL;
 335       newp->nwcs = 0;
 336     }
 337   newp->mborder = NULL;
 338   newp->wcorder = 0;
 339   newp->used_in_level = 0;
 340   newp->is_character = is_character;
 341
 342   /* Will be assigned later.  XXX  */
 343   newp->mbseqorder = 0;
 344   newp->wcseqorder = 0;
 345
 346   /* Will be allocated later.  */
 347   newp->weights = NULL;
 348
 349   newp->file = NULL;
 350   newp->line = 0;
 351
 352   newp->section = collate->current_section;
 353
 354   newp->last = NULL;
 355   newp->next = NULL;
 356
 357   newp->mbnext = NULL;
 358   newp->mblast = NULL;
 359
 360   newp->wcnext = NULL;
 361   newp->wclast = NULL;
 362
 363   return newp;
 364 }
 365
 366
 367 static struct symbol_t *
 368 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 369 {
 370   struct symbol_t *newp;
 371
 372   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 373
 374   newp->name = obstack_copy0 (&collate->mempool, name, len);
 375   newp->order = NULL;
 376
 377   newp->file = NULL;
 378   newp->line = 0;
 379
 380   return newp;
 381 }
 382
 383
 384 /* Test whether this name is already defined somewhere.  */
 385 static int
 386 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 387                  const struct charmap_t *charmap,
 388                  struct repertoire_t *repertoire, const char *symbol,
 389                  size_t symbol_len)
 390 {
 391   void *ignore = NULL;
 392
 393   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 394     {
 395       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 396                 (int) symbol_len, symbol);
 397       return 1;
 398     }
 399
 400   if (repertoire != NULL
 401       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 402           == 0))
 403     {
 404       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 405                 (int) symbol_len, symbol);
 406       return 1;
 407     }
 408
 409   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 410     {
 411       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 412                 (int) symbol_len, symbol);
 413       return 1;
 414     }
 415
 416   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 417     {
 418       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 419                 (int) symbol_len, symbol);
 420       return 1;
 421     }
 422
 423   return 0;
 424 }
 425
 426
 427 /* Read the direction specification.  */
 428 static void
 429 read_directions (struct linereader *ldfile, struct token *arg,
 430                  const struct charmap_t *charmap,
 431                  struct repertoire_t *repertoire, struct localedef_t *result)
 432 {
 433   int cnt = 0;
 434   int max = nrules ?: 10;
 435   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 436   int warned = 0;
 437   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 438
 439   while (1)
 440     {
 441       int valid = 0;
 442
 443       if (arg->tok == tok_forward)
 444         {
 445           if (rules[cnt] & sort_backward)
 446             {
 447               if (! warned)
 448                 {
 449                   lr_error (ldfile, _("\
 450 %s: `forward' and `backward' are mutually excluding each other"),
 451                             "LC_COLLATE");
 452                   warned = 1;
 453                 }
 454             }
 455           else if (rules[cnt] & sort_forward)
 456             {
 457               if (! warned)
 458                 {
 459                   lr_error (ldfile, _("\
 460 %s: `%s' mentioned more than once in definition of weight %d"),
 461                             "LC_COLLATE", "forward", cnt + 1);
 462                 }
 463             }
 464           else
 465             rules[cnt] |= sort_forward;
 466
 467           valid = 1;
 468         }
 469       else if (arg->tok == tok_backward)
 470         {
 471           if (rules[cnt] & sort_forward)
 472             {
 473               if (! warned)
 474                 {
 475                   lr_error (ldfile, _("\
 476 %s: `forward' and `backward' are mutually excluding each other"),
 477                             "LC_COLLATE");
 478                   warned = 1;
 479                 }
 480             }
 481           else if (rules[cnt] & sort_backward)
 482             {
 483               if (! warned)
 484                 {
 485                   lr_error (ldfile, _("\
 486 %s: `%s' mentioned more than once in definition of weight %d"),
 487                             "LC_COLLATE", "backward", cnt + 1);
 488                 }
 489             }
 490           else
 491             rules[cnt] |= sort_backward;
 492
 493           valid = 1;
 494         }
 495       else if (arg->tok == tok_position)
 496         {
 497           if (rules[cnt] & sort_position)
 498             {
 499               if (! warned)
 500                 {
 501                   lr_error (ldfile, _("\
 502 %s: `%s' mentioned more than once in definition of weight %d"),
 503                             "LC_COLLATE", "position", cnt + 1);
 504                 }
 505             }
 506           else
 507             rules[cnt] |= sort_position;
 508
 509           valid = 1;
 510         }
 511
 512       if (valid)
 513         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 514
 515       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 516           || arg->tok == tok_semicolon)
 517         {
 518           if (! valid && ! warned)
 519             {
 520               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 521               warned = 1;
 522             }
 523
 524           /* See whether we have to increment the counter.  */
 525           if (arg->tok != tok_comma && rules[cnt] != 0)
 526             {
 527               /* Add the default `forward' if we have seen only `position'.  */
 528               if (rules[cnt] == sort_position)
 529                 rules[cnt] = sort_position | sort_forward;
 530
 531               ++cnt;
 532             }
 533
 534           if (arg->tok == tok_eof || arg->tok == tok_eol)
 535             /* End of line or file, so we exit the loop.  */
 536             break;
 537
 538           if (nrules == 0)
 539             {
 540               /* See whether we have enough room in the array.  */
 541               if (cnt == max)
 542                 {
 543                   max += 10;
 544                   rules = (enum coll_sort_rule *) xrealloc (rules,
 545                                                             max
 546                                                             * sizeof (*rules));
 547                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 548                 }
 549             }
 550           else
 551             {
 552               if (cnt == nrules)
 553                 {
 554                   /* There must not be any more rule.  */
 555                   if (! warned)
 556                     {
 557                       lr_error (ldfile, _("\
 558 %s: too many rules; first entry only had %d"),
 559                                 "LC_COLLATE", nrules);
 560                       warned = 1;
 561                     }
 562
 563                   lr_ignore_rest (ldfile, 0);
 564                   break;
 565                 }
 566             }
 567         }
 568       else
 569         {
 570           if (! warned)
 571             {
 572               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 573               warned = 1;
 574             }
 575         }
 576
 577       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 578     }
 579
 580   if (nrules == 0)
 581     {
 582       /* Now we know how many rules we have.  */
 583       nrules = cnt;
 584       rules = (enum coll_sort_rule *) xrealloc (rules,
 585                                                 nrules * sizeof (*rules));
 586     }
 587   else
 588     {
 589       if (cnt < nrules)
 590         {
 591           /* Not enough rules in this specification.  */
 592           if (! warned)
 593             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 594
 595           do
 596             rules[cnt] = sort_forward;
 597           while (++cnt < nrules);
 598         }
 599     }
 600
 601   collate->current_section->rules = rules;
 602 }
 603
 604
 605 static struct element_t *
 606 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 607               const char *str, size_t len)
 608 {
 609   struct element_t *result = NULL;
 610
 611   /* Search for the entries among the collation sequences already define.  */
 612   if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
 613     {
 614       /* Nope, not define yet.  So we see whether it is a
 615          collation symbol.  */
 616       void *ptr;
 617
 618       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 619         {
 620           /* It's a collation symbol.  */
 621           struct symbol_t *sym = (struct symbol_t *) ptr;
 622           result = sym->order;
 623
 624           if (result == NULL)
 625             result = sym->order = new_element (collate, NULL, 0, NULL,
 626                                                NULL, 0, 0);
 627         }
 628       else if (find_entry (&collate->elem_table, str, len,
 629                            (void **) &result) != 0)
 630         {
 631           /* It's also no collation element.  So it is a character
 632              element defined later.  */
 633           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 634           /* Insert it into the sequence table.  */
 635           insert_entry (&collate->seq_table, str, len, result);
 636         }
 637     }
 638
 639   return result;
 640 }
 641
 642
 643 static void
 644 unlink_element (struct locale_collate_t *collate)
 645 {
 646   if (collate->cursor == collate->start)
 647     {
 648       assert (collate->cursor->next == NULL);
 649       assert (collate->cursor->last == NULL);
 650       collate->cursor = NULL;
 651     }
 652   else
 653     {
 654       if (collate->cursor->next != NULL)
 655         collate->cursor->next->last = collate->cursor->last;
 656       if (collate->cursor->last != NULL)
 657         collate->cursor->last->next = collate->cursor->next;
 658       collate->cursor = collate->cursor->last;
 659     }
 660 }
 661
 662
 663 static void
 664 insert_weights (struct linereader *ldfile, struct element_t *elem,
 665                 const struct charmap_t *charmap,
 666                 struct repertoire_t *repertoire, struct localedef_t *result,
 667                 enum token_t ellipsis)
 668 {
 669   int weight_cnt;
 670   struct token *arg;
 671   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 672
 673   /* Initialize all the fields.  */
 674   elem->file = ldfile->fname;
 675   elem->line = ldfile->lineno;
 676
 677   elem->last = collate->cursor;
 678   elem->next = collate->cursor ? collate->cursor->next : NULL;
 679   if (collate->cursor != NULL && collate->cursor->next != NULL)
 680     collate->cursor->next->last = elem;
 681   if (collate->cursor != NULL)
 682     collate->cursor->next = elem;
 683   if (collate->start == NULL)
 684     {
 685       assert (collate->cursor == NULL);
 686       collate->start = elem;
 687     }
 688
 689   elem->section = collate->current_section;
 690
 691   if (collate->current_section->first == NULL)
 692     collate->current_section->first = elem;
 693   if (collate->current_section->last == collate->cursor)
 694     collate->current_section->last = elem;
 695
 696   collate->cursor = elem;
 697
 698   elem->weights = (struct element_list_t *)
 699     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 700   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 701
 702   weight_cnt = 0;
 703
 704   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 705   do
 706     {
 707       if (arg->tok == tok_eof || arg->tok == tok_eol)
 708         break;
 709
 710       if (arg->tok == tok_ignore)
 711         {
 712           /* The weight for this level has to be ignored.  We use the
 713              null pointer to indicate this.  */
 714           elem->weights[weight_cnt].w = (struct element_t **)
 715             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 716           elem->weights[weight_cnt].w[0] = NULL;
 717           elem->weights[weight_cnt].cnt = 1;
 718         }
 719       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 720         {
 721           char ucs4str[10];
 722           struct element_t *val;
 723           char *symstr;
 724           size_t symlen;
 725
 726           if (arg->tok == tok_bsymbol)
 727             {
 728               symstr = arg->val.str.startmb;
 729               symlen = arg->val.str.lenmb;
 730             }
 731           else
 732             {
 733               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 734               symstr = ucs4str;
 735               symlen = 9;
 736             }
 737
 738           val = find_element (ldfile, collate, symstr, symlen);
 739           if (val == NULL)
 740             break;
 741
 742           elem->weights[weight_cnt].w = (struct element_t **)
 743             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 744           elem->weights[weight_cnt].w[0] = val;
 745           elem->weights[weight_cnt].cnt = 1;
 746         }
 747       else if (arg->tok == tok_string)
 748         {
 749           /* Split the string up in the individual characters and put
 750              the element definitions in the list.  */
 751           const char *cp = arg->val.str.startmb;
 752           int cnt = 0;
 753           struct element_t *charelem;
 754           struct element_t **weights = NULL;
 755           int max = 0;
 756
 757           if (*cp == '\0')
 758             {
 759               lr_error (ldfile, _("%s: empty weight string not allowed"),
 760                         "LC_COLLATE");
 761               lr_ignore_rest (ldfile, 0);
 762               break;
 763             }
 764
 765           do
 766             {
 767               if (*cp == '<')
 768                 {
 769                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 770                      the latter we have to unify the name.  */
 771                   const char *startp = ++cp;
 772                   size_t len;
 773
 774                   while (*cp != '>')
 775                     {
 776                       if (*cp == ldfile->escape_char)
 777                         ++cp;
 778                       if (*cp == '\0')
 779                         /* It's a syntax error.  */
 780                         goto syntax;
 781
 782                       ++cp;
 783                     }
 784
 785                   if (cp - startp == 5 && startp[0] == 'U'
 786                       && isxdigit (startp[1]) && isxdigit (startp[2])
 787                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 788                     {
 789                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 790                       char *newstr;
 791
 792                       newstr = (char *) xmalloc (10);
 793                       snprintf (newstr, 10, "U%08X", ucs4);
 794                       startp = newstr;
 795
 796                       len = 9;
 797                     }
 798                   else
 799                     len = cp - startp;
 800
 801                   charelem = find_element (ldfile, collate, startp, len);
 802                   ++cp;
 803                 }
 804               else
 805                 {
 806                   /* People really shouldn't use characters directly in
 807                      the string.  Especially since it's not really clear
 808                      what this means.  We interpret all characters in the
 809                      string as if that would be bsymbols.  Otherwise we
 810                      would have to match back to bsymbols somehow and this
 811                      is normally not what people normally expect.  */
 812                   charelem = find_element (ldfile, collate, cp++, 1);
 813                 }
 814
 815               if (charelem == NULL)
 816                 {
 817                   /* We ignore the rest of the line.  */
 818                   lr_ignore_rest (ldfile, 0);
 819                   break;
 820                 }
 821
 822               /* Add the pointer.  */
 823               if (cnt >= max)
 824                 {
 825                   struct element_t **newp;
 826                   max += 10;
 827                   newp = (struct element_t **)
 828                     alloca (max * sizeof (struct element_t *));
 829                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 830                   weights = newp;
 831                 }
 832               weights[cnt++] = charelem;
 833             }
 834           while (*cp != '\0');
 835
 836           /* Now store the information.  */
 837           elem->weights[weight_cnt].w = (struct element_t **)
 838             obstack_alloc (&collate->mempool,
 839                            cnt * sizeof (struct element_t *));
 840           memcpy (elem->weights[weight_cnt].w, weights,
 841                   cnt * sizeof (struct element_t *));
 842           elem->weights[weight_cnt].cnt = cnt;
 843
 844           /* We don't need the string anymore.  */
 845           free (arg->val.str.startmb);
 846         }
 847       else if (ellipsis != tok_none
 848                && (arg->tok == tok_ellipsis2
 849                    || arg->tok == tok_ellipsis3
 850                    || arg->tok == tok_ellipsis4))
 851         {
 852           /* It must be the same ellipsis as used in the initial column.  */
 853           if (arg->tok != ellipsis)
 854             lr_error (ldfile, _("\
 855 %s: weights must use the same ellipsis symbol as the name"),
 856                       "LC_COLLATE");
 857
 858           /* The weight for this level will depend on the element
 859              iterating over the range.  Put a placeholder.  */
 860           elem->weights[weight_cnt].w = (struct element_t **)
 861             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 862           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 863           elem->weights[weight_cnt].cnt = 1;
 864         }
 865       else
 866         {
 867         syntax:
 868           /* It's a syntax error.  */
 869           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 870           lr_ignore_rest (ldfile, 0);
 871           break;
 872         }
 873
 874       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 875       /* This better should be the end of the line or a semicolon.  */
 876       if (arg->tok == tok_semicolon)
 877         /* OK, ignore this and read the next token.  */
 878         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 879       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 880         {
 881           /* It's a syntax error.  */
 882           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 883           lr_ignore_rest (ldfile, 0);
 884           break;
 885         }
 886     }
 887   while (++weight_cnt < nrules);
 888
 889   if (weight_cnt < nrules)
 890     {
 891       /* This means the rest of the line uses the current element as
 892          the weight.  */
 893       do
 894         {
 895           elem->weights[weight_cnt].w = (struct element_t **)
 896             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 897           if (ellipsis == tok_none)
 898             elem->weights[weight_cnt].w[0] = elem;
 899           else
 900             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 901           elem->weights[weight_cnt].cnt = 1;
 902         }
 903       while (++weight_cnt < nrules);
 904     }
 905   else
 906     {
 907       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 908         {
 909           /* Too many rule values.  */
 910           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 911           lr_ignore_rest (ldfile, 0);
 912         }
 913       else
 914         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 915     }
 916 }
 917
 918
 919 static int
 920 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 921               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 922               struct localedef_t *result)
 923 {
 924   /* First find out what kind of symbol this is.  */
 925   struct charseq *seq;
 926   uint32_t wc;
 927   struct element_t *elem = NULL;
 928   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 929
 930   /* Try to find the character in the charmap.  */
 931   seq = charmap_find_value (charmap, symstr, symlen);
 932
 933   /* Determine the wide character.  */
 934   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 935     {
 936       wc = repertoire_find_value (repertoire, symstr, symlen);
 937       if (seq != NULL)
 938         seq->ucs4 = wc;
 939     }
 940   else
 941     wc = seq->ucs4;
 942
 943   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 944     {
 945       /* It's no character, so look through the collation elements and
 946          symbol list.  */
 947       if (find_entry (&collate->elem_table, symstr, symlen,
 948                            (void **) &elem) != 0)
 949         {
 950           void *result;
 951           struct symbol_t *sym = NULL;
 952
 953           /* It's also collation element.  Therefore it's either a
 954              collating symbol or it's a character which is not
 955              supported by the character set.  In the later case we
 956              simply create a dummy entry.  */
 957           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 958             {
 959               /* It's a collation symbol.  */
 960               sym = (struct symbol_t *) result;
 961
 962               elem = sym->order;
 963             }
 964
 965           if (elem == NULL)
 966             {
 967               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 968
 969               if (sym != NULL)
 970                 sym->order = elem;
 971               else
 972                 /* Enter a fake element in the sequence table.  This
 973                    won't cause anything in the output since there is
 974                    no multibyte or wide character associated with
 975                    it.  */
 976                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 977             }
 978         }
 979     }
 980   else
 981     {
 982       /* Otherwise the symbols stands for a character.  */
 983       if (find_entry (&collate->seq_table, symstr, symlen,
 984                       (void **) &elem) != 0)
 985         {
 986           uint32_t wcs[2] = { wc, 0 };
 987
 988           /* We have to allocate an entry.  */
 989           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 990                               seq != NULL ? seq->nbytes : 0,
 991                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 992                               symstr, symlen, 1);
 993
 994           /* And add it to the table.  */
 995           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
 996             /* This cannot happen.  */
 997             assert (! "Internal error");
 998         }
 999       else
1000         {
1001           /* Maybe the character was used before the definition.  In this case
1002              we have to insert the byte sequences now.  */
1003           if (elem->mbs == NULL && seq != NULL)
1004             {
1005               elem->mbs = obstack_copy0 (&collate->mempool,
1006                                          seq->bytes, seq->nbytes);
1007               elem->nmbs = seq->nbytes;
1008             }
1009
1010           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1011             {
1012               uint32_t wcs[2] = { wc, 0 };
1013
1014               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1015               elem->nwcs = 1;
1016             }
1017         }
1018     }
1019
1020   /* Test whether this element is not already in the list.  */
1021   if (elem->next != NULL || elem == collate->cursor)
1022     {
1023       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1024                 (int) symlen, symstr, elem->file, elem->line);
1025       lr_ignore_rest (ldfile, 0);
1026       return 1;
1027     }
1028
1029   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1030
1031   return 0;
1032 }
1033
1034
1035 static void
1036 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1037                  enum token_t ellipsis, const struct charmap_t *charmap,
1038                  struct repertoire_t *repertoire,
1039                  struct localedef_t *result)
1040 {
1041   struct element_t *startp;
1042   struct element_t *endp;
1043   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1044
1045   /* Unlink the entry added for the ellipsis.  */
1046   unlink_element (collate);
1047   startp = collate->cursor;
1048
1049   /* Process and add the end-entry.  */
1050   if (symstr != NULL
1051       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1052     /* Something went wrong with inserting the to-value.  This means
1053        we cannot process the ellipsis.  */
1054     return;
1055
1056   /* Reset the cursor.  */
1057   collate->cursor = startp;
1058
1059   /* Now we have to handle many different situations:
1060      - we have to distinguish between the three different ellipsis forms
1061      - the is the ellipsis at the beginning, in the middle, or at the end.
1062   */
1063   endp = collate->cursor->next;
1064   assert (symstr == NULL || endp != NULL);
1065
1066   /* XXX The following is probably very wrong since also collating symbols
1067      can appear in ranges.  But do we want/can refine the test for that?  */
1068 #if 0
1069   /* Both, the start and the end symbol, must stand for characters.  */
1070   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1071       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1072     {
1073       lr_error (ldfile, _("\
1074 %s: the start and the end symbol of a range must stand for characters"),
1075                 "LC_COLLATE");
1076       return;
1077     }
1078 #endif
1079
1080   if (ellipsis == tok_ellipsis3)
1081     {
1082       /* One requirement we make here: the length of the byte
1083          sequences for the first and end character must be the same.
1084          This is mainly to prevent unwanted effects and this is often
1085          not what is wanted.  */
1086       size_t len = (startp->mbs != NULL ? startp->nmbs
1087                     : (endp->mbs != NULL ? endp->nmbs : 0));
1088       char mbcnt[len + 1];
1089       char mbend[len + 1];
1090
1091       /* Well, this should be caught somewhere else already.  Just to
1092          make sure.  */
1093       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1094       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1095
1096       if (startp != NULL && endp != NULL
1097           && startp->mbs != NULL && endp->mbs != NULL
1098           && startp->nmbs != endp->nmbs)
1099         {
1100           lr_error (ldfile, _("\
1101 %s: byte sequences of first and last character must have the same length"),
1102                     "LC_COLLATE");
1103           return;
1104         }
1105
1106       /* Determine whether we have to generate multibyte sequences.  */
1107       if ((startp == NULL || startp->mbs != NULL)
1108           && (endp == NULL || endp->mbs != NULL))
1109         {
1110           int cnt;
1111           int ret;
1112
1113           /* Prepare the beginning byte sequence.  This is either from the
1114              beginning byte sequence or it is all nulls if it was an
1115              initial ellipsis.  */
1116           if (startp == NULL || startp->mbs == NULL)
1117             memset (mbcnt, '\0', len);
1118           else
1119             {
1120               memcpy (mbcnt, startp->mbs, len);
1121
1122               /* And increment it so that the value is the first one we will
1123                  try to insert.  */
1124               for (cnt = len - 1; cnt >= 0; --cnt)
1125                 if (++mbcnt[cnt] != '\0')
1126                   break;
1127             }
1128           mbcnt[len] = '\0';
1129
1130           /* And the end sequence.  */
1131           if (endp == NULL || endp->mbs == NULL)
1132             memset (mbend, '\0', len);
1133           else
1134             memcpy (mbend, endp->mbs, len);
1135           mbend[len] = '\0';
1136
1137           /* Test whether we have a correct range.  */
1138           ret = memcmp (mbcnt, mbend, len);
1139           if (ret >= 0)
1140             {
1141               if (ret > 0)
1142                 lr_error (ldfile, _("%s: byte sequence of first character of \
1143 sequence is not lower than that of the last character"), "LC_COLLATE");
1144               return;
1145             }
1146
1147           /* Generate the byte sequences data.  */
1148           while (1)
1149             {
1150               struct charseq *seq;
1151
1152               /* Quite a bit of work ahead.  We have to find the character
1153                  definition for the byte sequence and then determine the
1154                  wide character belonging to it.  */
1155               seq = charmap_find_symbol (charmap, mbcnt, len);
1156               if (seq != NULL)
1157                 {
1158                   struct element_t *elem;
1159                   size_t namelen;
1160
1161                   /* I don't this this can ever happen.  */
1162                   assert (seq->name != NULL);
1163                   namelen = strlen (seq->name);
1164
1165                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1166                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1167                                                        namelen);
1168
1169                   /* Now we are ready to insert the new value in the
1170                      sequence.  Find out whether the element is
1171                      already known.  */
1172                   if (find_entry (&collate->seq_table, seq->name, namelen,
1173                                   (void **) &elem) != 0)
1174                     {
1175                       uint32_t wcs[2] = { seq->ucs4, 0 };
1176
1177                       /* We have to allocate an entry.  */
1178                       elem = new_element (collate, mbcnt, len,
1179                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1180                                           ? NULL : wcs, seq->name,
1181                                           namelen, 1);
1182
1183                       /* And add it to the table.  */
1184                       if (insert_entry (&collate->seq_table, seq->name,
1185                                         namelen, elem) != 0)
1186                         /* This cannot happen.  */
1187                         assert (! "Internal error");
1188                     }
1189
1190                   /* Test whether this element is not already in the list.  */
1191                   if (elem->next != NULL || (collate->cursor != NULL
1192                                              && elem->next == collate->cursor))
1193                     {
1194                       lr_error (ldfile, _("\
1195 order for `%.*s' already defined at %s:%Zu"),
1196                                 (int) namelen, seq->name,
1197                                 elem->file, elem->line);
1198                       goto increment;
1199                     }
1200
1201                   /* Enqueue the new element.  */
1202                   elem->last = collate->cursor;
1203                   if (collate->cursor == NULL)
1204                     elem->next = NULL;
1205                   else
1206                     {
1207                       elem->next = collate->cursor->next;
1208                       elem->last->next = elem;
1209                       if (elem->next != NULL)
1210                         elem->next->last = elem;
1211                     }
1212                   if (collate->start == NULL)
1213                     {
1214                       assert (collate->cursor == NULL);
1215                       collate->start = elem;
1216                     }
1217                   collate->cursor = elem;
1218
1219                  /* Add the weight value.  We take them from the
1220                     `ellipsis_weights' member of `collate'.  */
1221                   elem->weights = (struct element_list_t *)
1222                     obstack_alloc (&collate->mempool,
1223                                    nrules * sizeof (struct element_list_t));
1224                   for (cnt = 0; cnt < nrules; ++cnt)
1225                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1226                         && (collate->ellipsis_weight.weights[cnt].w[0]
1227                             == ELEMENT_ELLIPSIS2))
1228                       {
1229                         elem->weights[cnt].w = (struct element_t **)
1230                           obstack_alloc (&collate->mempool,
1231                                          sizeof (struct element_t *));
1232                         elem->weights[cnt].w[0] = elem;
1233                         elem->weights[cnt].cnt = 1;
1234                       }
1235                     else
1236                       {
1237                         /* Simply use the weight from `ellipsis_weight'.  */
1238                         elem->weights[cnt].w =
1239                           collate->ellipsis_weight.weights[cnt].w;
1240                         elem->weights[cnt].cnt =
1241                           collate->ellipsis_weight.weights[cnt].cnt;
1242                       }
1243                 }
1244
1245               /* Increment for the next round.  */
1246             increment:
1247               for (cnt = len - 1; cnt >= 0; --cnt)
1248                 if (++mbcnt[cnt] != '\0')
1249                   break;
1250
1251               /* Find out whether this was all.  */
1252               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1253                 /* Yep, that's all.  */
1254                 break;
1255             }
1256         }
1257     }
1258   else
1259     {
1260       /* For symbolic range we naturally must have a beginning and an
1261          end specified by the user.  */
1262       if (startp == NULL)
1263         lr_error (ldfile, _("\
1264 %s: symbolic range ellipsis must not directly follow `order_start'"),
1265                   "LC_COLLATE");
1266       else if (endp == NULL)
1267         lr_error (ldfile, _("\
1268 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1269                   "LC_COLLATE");
1270       else
1271         {
1272           /* Determine the range.  To do so we have to determine the
1273              common prefix of the both names and then the numeric
1274              values of both ends.  */
1275           size_t lenfrom = strlen (startp->name);
1276           size_t lento = strlen (endp->name);
1277           char buf[lento + 1];
1278           int preflen = 0;
1279           long int from;
1280           long int to;
1281           char *cp;
1282           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1283
1284           if (lenfrom != lento)
1285             {
1286             invalid_range:
1287               lr_error (ldfile, _("\
1288 `%s' and `%.*s' are no valid names for symbolic range"),
1289                         startp->name, (int) lento, endp->name);
1290               return;
1291             }
1292
1293           while (startp->name[preflen] == endp->name[preflen])
1294             if (startp->name[preflen] == '\0')
1295               /* Nothing to be done.  The start and end point are identical
1296                  and while inserting the end point we have already given
1297                  the user an error message.  */
1298               return;
1299             else
1300               ++preflen;
1301
1302           errno = 0;
1303           from = strtol (startp->name + preflen, &cp, base);
1304           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1305             goto invalid_range;
1306
1307           errno = 0;
1308           to = strtol (endp->name + preflen, &cp, base);
1309           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1310             goto invalid_range;
1311
1312           /* Copy the prefix.  */
1313           memcpy (buf, startp->name, preflen);
1314
1315           /* Loop over all values.  */
1316           for (++from; from < to; ++from)
1317             {
1318               struct element_t *elem = NULL;
1319               struct charseq *seq;
1320               uint32_t wc;
1321               int cnt;
1322
1323               /* Generate the the name.  */
1324               sprintf (buf + preflen, base == 10 ? "%ld" : "%lX", from);
1325
1326               /* Look whether this name is already defined.  */
1327               if (find_entry (&collate->seq_table, buf, symlen,
1328                               (void **) &elem) == 0)
1329                 {
1330                   if (elem->next != NULL || (collate->cursor != NULL
1331                                              && elem->next == collate->cursor))
1332                     {
1333                       lr_error (ldfile, _("\
1334 %s: order for `%.*s' already defined at %s:%Zu"),
1335                                 "LC_COLLATE", (int) lenfrom, buf,
1336                                 elem->file, elem->line);
1337                       continue;
1338                     }
1339
1340                   if (elem->name == NULL)
1341                     {
1342                       lr_error (ldfile, _("%s: `%s' must be a character"),
1343                                 "LC_COLLATE", buf);
1344                       continue;
1345                     }
1346                 }
1347
1348               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1349                 {
1350                   /* Search for a character of this name.  */
1351                   seq = charmap_find_value (charmap, buf, lenfrom);
1352                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1353                     {
1354                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1355
1356                       if (seq != NULL)
1357                         seq->ucs4 = wc;
1358                     }
1359                   else
1360                     wc = seq->ucs4;
1361
1362                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1363                     /* We don't know anything about a character with this
1364                        name.  XXX Should we warn?  */
1365                     continue;
1366
1367                   if (elem == NULL)
1368                     {
1369                       uint32_t wcs[2] = { wc, 0 };
1370
1371                       /* We have to allocate an entry.  */
1372                       elem = new_element (collate,
1373                                           seq != NULL ? seq->bytes : NULL,
1374                                           seq != NULL ? seq->nbytes : 0,
1375                                           wc == ILLEGAL_CHAR_VALUE
1376                                           ? NULL : wcs, buf, lenfrom, 1);
1377                     }
1378                   else
1379                     {
1380                       /* Update the element.  */
1381                       if (seq != NULL)
1382                         {
1383                           elem->mbs = obstack_copy0 (&collate->mempool,
1384                                                      seq->bytes, seq->nbytes);
1385                           elem->nmbs = seq->nbytes;
1386                         }
1387
1388                       if (wc != ILLEGAL_CHAR_VALUE)
1389                         {
1390                           uint32_t zero = 0;
1391
1392                           obstack_grow (&collate->mempool,
1393                                         &wc, sizeof (uint32_t));
1394                           obstack_grow (&collate->mempool,
1395                                         &zero, sizeof (uint32_t));
1396                           elem->wcs = obstack_finish (&collate->mempool);
1397                           elem->nwcs = 1;
1398                         }
1399                     }
1400
1401                   elem->file = ldfile->fname;
1402                   elem->line = ldfile->lineno;
1403                   elem->section = collate->current_section;
1404                 }
1405
1406               /* Enqueue the new element.  */
1407               elem->last = collate->cursor;
1408               elem->next = collate->cursor->next;
1409               elem->last->next = elem;
1410               if (elem->next != NULL)
1411                 elem->next->last = elem;
1412               collate->cursor = elem;
1413
1414               /* Now add the weights.  They come from the `ellipsis_weights'
1415                  member of `collate'.  */
1416               elem->weights = (struct element_list_t *)
1417                 obstack_alloc (&collate->mempool,
1418                                nrules * sizeof (struct element_list_t));
1419               for (cnt = 0; cnt < nrules; ++cnt)
1420                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1421                     && (collate->ellipsis_weight.weights[cnt].w[0]
1422                         == ELEMENT_ELLIPSIS2))
1423                   {
1424                     elem->weights[cnt].w = (struct element_t **)
1425                       obstack_alloc (&collate->mempool,
1426                                      sizeof (struct element_t *));
1427                     elem->weights[cnt].w[0] = elem;
1428                     elem->weights[cnt].cnt = 1;
1429                   }
1430                 else
1431                   {
1432                     /* Simly use the weight from `ellipsis_weight'.  */
1433                     elem->weights[cnt].w =
1434                       collate->ellipsis_weight.weights[cnt].w;
1435                     elem->weights[cnt].cnt =
1436                       collate->ellipsis_weight.weights[cnt].cnt;
1437                   }
1438             }
1439         }
1440     }
1441 }
1442
1443
1444 static void
1445 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1446                  struct localedef_t *copy_locale, int ignore_content)
1447 {
1448   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1449     {
1450       struct locale_collate_t *collate;
1451
1452       if (copy_locale == NULL)
1453         {
1454           collate = locale->categories[LC_COLLATE].collate =
1455             (struct locale_collate_t *)
1456             xcalloc (1, sizeof (struct locale_collate_t));
1457
1458           /* Init the various data structures.  */
1459           init_hash (&collate->elem_table, 100);
1460           init_hash (&collate->sym_table, 100);
1461           init_hash (&collate->seq_table, 500);
1462           obstack_init (&collate->mempool);
1463
1464           collate->col_weight_max = -1;
1465         }
1466       else
1467         /* Reuse the copy_locale's data structures.  */
1468         collate = locale->categories[LC_COLLATE].collate =
1469           copy_locale->categories[LC_COLLATE].collate;
1470     }
1471
1472   ldfile->translate_strings = 0;
1473   ldfile->return_widestr = 0;
1474 }
1475
1476
1477 void
1478 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1479 {
1480   /* Now is the time when we can assign the individual collation
1481      values for all the symbols.  We have possibly different values
1482      for the wide- and the multibyte-character symbols.  This is done
1483      since it might make a difference in the encoding if there is in
1484      some cases no multibyte-character but there are wide-characters.
1485      (The other way around it is not important since theencoded
1486      collation value in the wide-character case is 32 bits wide and
1487      therefore requires no encoding).
1488
1489      The lowest collation value assigned is 2.  Zero is reserved for
1490      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1491      functions and 1 is used to separate the individual passes for the
1492      different rules.
1493
1494      We also have to construct is list with all the bytes/words which
1495      can come first in a sequence, followed by all the elements which
1496      also start with this byte/word.  The order is reverse which has
1497      among others the important effect that longer strings are located
1498      first in the list.  This is required for the output data since
1499      the algorithm used in `strcoll' etc depends on this.
1500
1501      The multibyte case is easy.  We simply sort into an array with
1502      256 elements.  */
1503   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1504   int mbact[nrules];
1505   int wcact;
1506   int mbseqact;
1507   int wcseqact;
1508   struct element_t *runp;
1509   int i;
1510   int need_undefined = 0;
1511   struct section_list *sect;
1512   int ruleidx;
1513   int nr_wide_elems = 0;
1514
1515   if (collate == NULL)
1516     {
1517       /* No data, no check.  */
1518       if (! be_quiet)
1519         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1520                                 "LC_COLLATE"));
1521       return;
1522     }
1523
1524   /* If this assertion is hit change the type in `element_t'.  */
1525   assert (nrules <= sizeof (runp->used_in_level) * 8);
1526
1527   /* Make sure that the `position' rule is used either in all sections
1528      or in none.  */
1529   for (i = 0; i < nrules; ++i)
1530     for (sect = collate->sections; sect != NULL; sect = sect->next)
1531       if (sect->rules != NULL
1532           && ((sect->rules[i] & sort_position)
1533               != (collate->sections->rules[i] & sort_position)))
1534         {
1535           WITH_CUR_LOCALE (error (0, 0, _("\
1536 %s: `position' must be used for a specific level in all sections or none"),
1537                                   "LC_COLLATE"));
1538           break;
1539         }
1540
1541   /* Find out which elements are used at which level.  At the same
1542      time we find out whether we have any undefined symbols.  */
1543   runp = collate->start;
1544   while (runp != NULL)
1545     {
1546       if (runp->mbs != NULL)
1547         {
1548           for (i = 0; i < nrules; ++i)
1549             {
1550               int j;
1551
1552               for (j = 0; j < runp->weights[i].cnt; ++j)
1553                 /* A NULL pointer as the weight means IGNORE.  */
1554                 if (runp->weights[i].w[j] != NULL)
1555                   {
1556                     if (runp->weights[i].w[j]->weights == NULL)
1557                       {
1558                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1559                                                         runp->line,
1560                                                         _("symbol `%s' not defined"),
1561                                                         runp->weights[i].w[j]->name));
1562
1563                         need_undefined = 1;
1564                         runp->weights[i].w[j] = &collate->undefined;
1565                       }
1566                     else
1567                       /* Set the bit for the level.  */
1568                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1569                   }
1570             }
1571         }
1572
1573       /* Up to the next entry.  */
1574       runp = runp->next;
1575     }
1576
1577   /* Walk through the list of defined sequences and assign weights.  Also
1578      create the data structure which will allow generating the single byte
1579      character based tables.
1580
1581      Since at each time only the weights for each of the rules are
1582      only compared to other weights for this rule it is possible to
1583      assign more compact weight values than simply counting all
1584      weights in sequence.  We can assign weights from 3, one for each
1585      rule individually and only for those elements, which are actually
1586      used for this rule.
1587
1588      Why is this important?  It is not for the wide char table.  But
1589      it is for the singlebyte output since here larger numbers have to
1590      be encoded to make it possible to emit the value as a byte
1591      string.  */
1592   for (i = 0; i < nrules; ++i)
1593     mbact[i] = 2;
1594   wcact = 2;
1595   mbseqact = 0;
1596   wcseqact = 0;
1597   runp = collate->start;
1598   while (runp != NULL)
1599     {
1600       /* Determine the order.  */
1601       if (runp->used_in_level != 0)
1602         {
1603           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1604                                                  nrules * sizeof (int));
1605
1606           for (i = 0; i < nrules; ++i)
1607             if ((runp->used_in_level & (1 << i)) != 0)
1608               runp->mborder[i] = mbact[i]++;
1609             else
1610               runp->mborder[i] = 0;
1611         }
1612
1613       if (runp->mbs != NULL)
1614         {
1615           struct element_t **eptr;
1616           struct element_t *lastp = NULL;
1617
1618           /* Find the point where to insert in the list.  */
1619           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1620           while (*eptr != NULL)
1621             {
1622               if ((*eptr)->nmbs < runp->nmbs)
1623                 break;
1624
1625               if ((*eptr)->nmbs == runp->nmbs)
1626                 {
1627                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1628
1629                   if (c == 0)
1630                     {
1631                       /* This should not happen.  It means that we have
1632                          to symbols with the same byte sequence.  It is
1633                          of course an error.  */
1634                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1635                                                       (*eptr)->line,
1636                                                       _("\
1637 symbol `%s' has the same encoding as"), (*eptr)->name);
1638                                        error_at_line (0, 0, runp->file,
1639                                                       runp->line,
1640                                                       _("symbol `%s'"),
1641                                                       runp->name));
1642                       goto dont_insert;
1643                     }
1644                   else if (c < 0)
1645                     /* Insert it here.  */
1646                     break;
1647                 }
1648
1649               /* To the next entry.  */
1650               lastp = *eptr;
1651               eptr = &(*eptr)->mbnext;
1652             }
1653
1654           /* Set the pointers.  */
1655           runp->mbnext = *eptr;
1656           runp->mblast = lastp;
1657           if (*eptr != NULL)
1658             (*eptr)->mblast = runp;
1659           *eptr = runp;
1660         dont_insert:
1661           ;
1662         }
1663
1664       if (runp->used_in_level)
1665         {
1666           runp->wcorder = wcact++;
1667
1668           /* We take the opportunity to count the elements which have
1669              wide characters.  */
1670           ++nr_wide_elems;
1671         }
1672
1673       if (runp->is_character)
1674         {
1675           if (runp->nmbs == 1)
1676             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1677
1678           runp->wcseqorder = wcseqact++;
1679         }
1680       else if (runp->mbs != NULL && runp->weights != NULL)
1681         /* This is for collation elements.  */
1682         runp->wcseqorder = wcseqact++;
1683
1684       /* Up to the next entry.  */
1685       runp = runp->next;
1686     }
1687
1688   /* Find out whether any of the `mbheads' entries is unset.  In this
1689      case we use the UNDEFINED entry.  */
1690   for (i = 1; i < 256; ++i)
1691     if (collate->mbheads[i] == NULL)
1692       {
1693         need_undefined = 1;
1694         collate->mbheads[i] = &collate->undefined;
1695       }
1696
1697   /* Now to the wide character case.  */
1698   collate->wcheads.p = 6;
1699   collate->wcheads.q = 10;
1700   wchead_table_init (&collate->wcheads);
1701
1702   collate->wcseqorder.p = 6;
1703   collate->wcseqorder.q = 10;
1704   collseq_table_init (&collate->wcseqorder);
1705
1706   /* Start adding.  */
1707   runp = collate->start;
1708   while (runp != NULL)
1709     {
1710       if (runp->wcs != NULL)
1711         {
1712           struct element_t *e;
1713           struct element_t **eptr;
1714           struct element_t *lastp;
1715
1716           /* Insert the collation sequence value.  */
1717           if (runp->is_character)
1718             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1719                                runp->wcseqorder);
1720
1721           /* Find the point where to insert in the list.  */
1722           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1723           eptr = &e;
1724           lastp = NULL;
1725           while (*eptr != NULL)
1726             {
1727               if ((*eptr)->nwcs < runp->nwcs)
1728                 break;
1729
1730               if ((*eptr)->nwcs == runp->nwcs)
1731                 {
1732                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1733                                    (wchar_t *) runp->wcs, runp->nwcs);
1734
1735                   if (c == 0)
1736                     {
1737                       /* This should not happen.  It means that we have
1738                          two symbols with the same byte sequence.  It is
1739                          of course an error.  */
1740                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1741                                                       (*eptr)->line,
1742                                                       _("\
1743 symbol `%s' has the same encoding as"), (*eptr)->name);
1744                                        error_at_line (0, 0, runp->file,
1745                                                       runp->line,
1746                                                       _("symbol `%s'"),
1747                                                       runp->name));
1748                       goto dont_insertwc;
1749                     }
1750                   else if (c < 0)
1751                     /* Insert it here.  */
1752                     break;
1753                 }
1754
1755               /* To the next entry.  */
1756               lastp = *eptr;
1757               eptr = &(*eptr)->wcnext;
1758             }
1759
1760           /* Set the pointers.  */
1761           runp->wcnext = *eptr;
1762           runp->wclast = lastp;
1763           if (*eptr != NULL)
1764             (*eptr)->wclast = runp;
1765           *eptr = runp;
1766           if (eptr == &e)
1767             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1768         dont_insertwc:
1769           ;
1770         }
1771
1772       /* Up to the next entry.  */
1773       runp = runp->next;
1774     }
1775
1776   collseq_table_finalize (&collate->wcseqorder);
1777
1778   /* Now determine whether the UNDEFINED entry is needed and if yes,
1779      whether it was defined.  */
1780   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1781   if (collate->undefined.file == NULL)
1782     {
1783       if (need_undefined)
1784         {
1785           /* This seems not to be enforced by recent standards.  Don't
1786              emit an error, simply append UNDEFINED at the end.  */
1787           if (0)
1788             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1789
1790           /* Add UNDEFINED at the end.  */
1791           collate->undefined.mborder =
1792             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1793
1794           for (i = 0; i < nrules; ++i)
1795             collate->undefined.mborder[i] = mbact[i]++;
1796         }
1797
1798       /* In any case we will need the definition for the wide character
1799          case.  But we will not complain that it is missing since the
1800          specification strangely enough does not seem to account for
1801          this.  */
1802       collate->undefined.wcorder = wcact++;
1803     }
1804
1805   /* Finally, try to unify the rules for the sections.  Whenever the rules
1806      for a section are the same as those for another section give the
1807      ruleset the same index.  Since there are never many section we can
1808      use an O(n^2) algorithm here.  */
1809   sect = collate->sections;
1810   while (sect != NULL && sect->rules == NULL)
1811     sect = sect->next;
1812
1813   /* Bail out if we have no sections because of earlier errors.  */
1814   if (sect == NULL)
1815     {
1816       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1817                               _("too many errors; giving up")));
1818       return;
1819     }
1820
1821   ruleidx = 0;
1822   do
1823     {
1824       struct section_list *osect = collate->sections;
1825
1826       while (osect != sect)
1827         if (osect->rules != NULL
1828             && memcmp (osect->rules, sect->rules, nrules) == 0)
1829           break;
1830         else
1831           osect = osect->next;
1832
1833       if (osect == sect)
1834         sect->ruleidx = ruleidx++;
1835       else
1836         sect->ruleidx = osect->ruleidx;
1837
1838       /* Next section.  */
1839       do
1840         sect = sect->next;
1841       while (sect != NULL && sect->rules == NULL);
1842     }
1843   while (sect != NULL);
1844   /* We are currently not prepared for more than 128 rulesets.  But this
1845      should never really be a problem.  */
1846   assert (ruleidx <= 128);
1847 }
1848
1849
1850 static int32_t
1851 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1852                struct element_t *elem)
1853 {
1854   size_t cnt;
1855   int32_t retval;
1856
1857   /* Optimize the use of UNDEFINED.  */
1858   if (elem == &collate->undefined)
1859     /* The weights are already inserted.  */
1860     return 0;
1861
1862   /* This byte can start exactly one collation element and this is
1863      a single byte.  We can directly give the index to the weights.  */
1864   retval = obstack_object_size (pool);
1865
1866   /* Construct the weight.  */
1867   for (cnt = 0; cnt < nrules; ++cnt)
1868     {
1869       char buf[elem->weights[cnt].cnt * 7];
1870       int len = 0;
1871       int i;
1872
1873       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1874         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1875         if (elem->weights[cnt].w[i] != NULL)
1876           len += utf8_encode (&buf[len],
1877                               elem->weights[cnt].w[i]->mborder[cnt]);
1878
1879       /* And add the buffer content.  */
1880       obstack_1grow (pool, len);
1881       obstack_grow (pool, buf, len);
1882     }
1883
1884   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1885 }
1886
1887
1888 static int32_t
1889 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1890                  struct element_t *elem)
1891 {
1892   size_t cnt;
1893   int32_t retval;
1894
1895   /* Optimize the use of UNDEFINED.  */
1896   if (elem == &collate->undefined)
1897     /* The weights are already inserted.  */
1898     return 0;
1899
1900   /* This byte can start exactly one collation element and this is
1901      a single byte.  We can directly give the index to the weights.  */
1902   retval = obstack_object_size (pool) / sizeof (int32_t);
1903
1904   /* Construct the weight.  */
1905   for (cnt = 0; cnt < nrules; ++cnt)
1906     {
1907       int32_t buf[elem->weights[cnt].cnt];
1908       int i;
1909       int32_t j;
1910
1911       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1912         if (elem->weights[cnt].w[i] != NULL)
1913           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1914
1915       /* And add the buffer content.  */
1916       obstack_int32_grow (pool, j);
1917
1918       obstack_grow (pool, buf, j * sizeof (int32_t));
1919     }
1920
1921   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1922 }
1923
1924
1925 void
1926 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
1927                 const char *output_path)
1928 {
1929   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1930   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1931   struct iovec iov[2 + nelems];
1932   struct locale_file data;
1933   uint32_t idx[nelems];
1934   size_t cnt;
1935   size_t ch;
1936   int32_t tablemb[256];
1937   struct obstack weightpool;
1938   struct obstack extrapool;
1939   struct obstack indirectpool;
1940   struct section_list *sect;
1941   struct collidx_table tablewc;
1942   uint32_t elem_size;
1943   uint32_t *elem_table;
1944   int i;
1945   struct element_t *runp;
1946
1947   data.magic = LIMAGIC (LC_COLLATE);
1948   data.n = nelems;
1949   iov[0].iov_base = (void *) &data;
1950   iov[0].iov_len = sizeof (data);
1951
1952   iov[1].iov_base = (void *) idx;
1953   iov[1].iov_len = sizeof (idx);
1954
1955   idx[0] = iov[0].iov_len + iov[1].iov_len;
1956   cnt = 0;
1957
1958   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1959   iov[2 + cnt].iov_base = &nrules;
1960   iov[2 + cnt].iov_len = sizeof (uint32_t);
1961   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1962   ++cnt;
1963
1964   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
1965   if (collate == NULL)
1966     {
1967       int32_t dummy = 0;
1968
1969       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1970         {
1971           /* The words have to be handled specially.  */
1972           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1973             {
1974               iov[2 + cnt].iov_base = &dummy;
1975               iov[2 + cnt].iov_len = sizeof (int32_t);
1976             }
1977           else
1978             {
1979               iov[2 + cnt].iov_base = NULL;
1980               iov[2 + cnt].iov_len = 0;
1981             }
1982
1983           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1984             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1985           ++cnt;
1986         }
1987
1988       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1989
1990       write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1991
1992       return;
1993     }
1994
1995   obstack_init (&weightpool);
1996   obstack_init (&extrapool);
1997   obstack_init (&indirectpool);
1998
1999   /* Since we are using the sign of an integer to mark indirection the
2000      offsets in the arrays we are indirectly referring to must not be
2001      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2002   obstack_int32_grow (&extrapool, 0);
2003   obstack_int32_grow (&indirectpool, 0);
2004
2005   /* Prepare the ruleset table.  */
2006   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2007     if (sect->rules != NULL && sect->ruleidx == i)
2008       {
2009         int j;
2010
2011         obstack_make_room (&weightpool, nrules);
2012
2013         for (j = 0; j < nrules; ++j)
2014           obstack_1grow_fast (&weightpool, sect->rules[j]);
2015         ++i;
2016       }
2017   /* And align the output.  */
2018   i = (nrules * i) % __alignof__ (int32_t);
2019   if (i > 0)
2020     do
2021       obstack_1grow (&weightpool, '\0');
2022     while (++i < __alignof__ (int32_t));
2023
2024   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2025   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2026   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2027   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2028   ++cnt;
2029
2030   /* Generate the 8-bit table.  Walk through the lists of sequences
2031      starting with the same byte and add them one after the other to
2032      the table.  In case we have more than one sequence starting with
2033      the same byte we have to use extra indirection.
2034
2035      First add a record for the NUL byte.  This entry will never be used
2036      so it does not matter.  */
2037   tablemb[0] = 0;
2038
2039   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2040      will probably be used more than once it is good to store the
2041      weights only once.  */
2042   if (collate->undefined.used_in_level != 0)
2043     output_weight (&weightpool, collate, &collate->undefined);
2044
2045   for (ch = 1; ch < 256; ++ch)
2046     if (collate->mbheads[ch]->mbnext == NULL
2047         && collate->mbheads[ch]->nmbs <= 1)
2048       {
2049         tablemb[ch] = output_weight (&weightpool, collate,
2050                                      collate->mbheads[ch]);
2051       }
2052     else
2053       {
2054         /* The entries in the list are sorted by length and then
2055            alphabetically.  This is the order in which we will add the
2056            elements to the collation table.  This allows simply walking
2057            the table in sequence and stopping at the first matching
2058            entry.  Since the longer sequences are coming first in the
2059            list they have the possibility to match first, just as it
2060            has to be.  In the worst case we are walking to the end of
2061            the list where we put, if no singlebyte sequence is defined
2062            in the locale definition, the weights for UNDEFINED.
2063
2064            To reduce the length of the search list we compress them a bit.
2065            This happens by collecting sequences of consecutive byte
2066            sequences in one entry (having and begin and end byte sequence)
2067            and add only one index into the weight table.  We can find the
2068            consecutive entries since they are also consecutive in the list.  */
2069         struct element_t *runp = collate->mbheads[ch];
2070         struct element_t *lastp;
2071
2072         assert ((obstack_object_size (&extrapool)
2073                  & (__alignof__ (int32_t) - 1)) == 0);
2074
2075         tablemb[ch] = -obstack_object_size (&extrapool);
2076
2077         do
2078           {
2079             /* Store the current index in the weight table.  We know that
2080                the current position in the `extrapool' is aligned on a
2081                32-bit address.  */
2082             int32_t weightidx;
2083             int added;
2084
2085             /* Find out wether this is a single entry or we have more than
2086                one consecutive entry.  */
2087             if (runp->mbnext != NULL
2088                 && runp->nmbs == runp->mbnext->nmbs
2089                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2090                 && (runp->mbs[runp->nmbs - 1]
2091                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2092               {
2093                 int i;
2094                 struct element_t *series_startp = runp;
2095                 struct element_t *curp;
2096
2097                 /* Compute how much space we will need.  */
2098                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2099                           + __alignof__ (int32_t) - 1)
2100                          & ~(__alignof__ (int32_t) - 1));
2101                 assert ((obstack_object_size (&extrapool)
2102                          & (__alignof__ (int32_t) - 1)) == 0);
2103                 obstack_make_room (&extrapool, added);
2104
2105                 /* More than one consecutive entry.  We mark this by having
2106                    a negative index into the indirect table.  */
2107                 obstack_int32_grow_fast (&extrapool,
2108                                          -(obstack_object_size (&indirectpool)
2109                                            / sizeof (int32_t)));
2110
2111                 /* Now search first the end of the series.  */
2112                 do
2113                   runp = runp->mbnext;
2114                 while (runp->mbnext != NULL
2115                        && runp->nmbs == runp->mbnext->nmbs
2116                        && memcmp (runp->mbs, runp->mbnext->mbs,
2117                                   runp->nmbs - 1) == 0
2118                        && (runp->mbs[runp->nmbs - 1]
2119                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2120
2121                 /* Now walk backward from here to the beginning.  */
2122                 curp = runp;
2123
2124                 assert (runp->nmbs <= 256);
2125                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2126                 for (i = 1; i < curp->nmbs; ++i)
2127                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2128
2129                 /* Now find the end of the consecutive sequence and
2130                    add all the indeces in the indirect pool.  */
2131                 do
2132                   {
2133                     weightidx = output_weight (&weightpool, collate, curp);
2134                     obstack_int32_grow (&indirectpool, weightidx);
2135
2136                     curp = curp->mblast;
2137                   }
2138                 while (curp != series_startp);
2139
2140                 /* Add the final weight.  */
2141                 weightidx = output_weight (&weightpool, collate, curp);
2142                 obstack_int32_grow (&indirectpool, weightidx);
2143
2144                 /* And add the end byte sequence.  Without length this
2145                    time.  */
2146                 for (i = 1; i < curp->nmbs; ++i)
2147                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2148               }
2149             else
2150               {
2151                 /* A single entry.  Simply add the index and the length and
2152                    string (except for the first character which is already
2153                    tested for).  */
2154                 int i;
2155
2156                 /* Output the weight info.  */
2157                 weightidx = output_weight (&weightpool, collate, runp);
2158
2159                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2160                           + __alignof__ (int32_t) - 1)
2161                          & ~(__alignof__ (int32_t) - 1));
2162                 assert ((obstack_object_size (&extrapool)
2163                          & (__alignof__ (int32_t) - 1)) == 0);
2164                 obstack_make_room (&extrapool, added);
2165
2166                 obstack_int32_grow_fast (&extrapool, weightidx);
2167                 assert (runp->nmbs <= 256);
2168                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2169
2170                 for (i = 1; i < runp->nmbs; ++i)
2171                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2172               }
2173
2174             /* Add alignment bytes if necessary.  */
2175             while ((obstack_object_size (&extrapool)
2176                     & (__alignof__ (int32_t) - 1)) != 0)
2177               obstack_1grow_fast (&extrapool, '\0');
2178
2179             /* Next entry.  */
2180             lastp = runp;
2181             runp = runp->mbnext;
2182           }
2183         while (runp != NULL);
2184
2185         assert ((obstack_object_size (&extrapool)
2186                  & (__alignof__ (int32_t) - 1)) == 0);
2187
2188         /* If the final entry in the list is not a single character we
2189            add an UNDEFINED entry here.  */
2190         if (lastp->nmbs != 1)
2191           {
2192             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2193                          & ~(__alignof__ (int32_t) - 1));
2194             obstack_make_room (&extrapool, added);
2195
2196             obstack_int32_grow_fast (&extrapool, 0);
2197             /* XXX What rule? We just pick the first.  */
2198             obstack_1grow_fast (&extrapool, 0);
2199             /* Length is zero.  */
2200             obstack_1grow_fast (&extrapool, 0);
2201
2202             /* Add alignment bytes if necessary.  */
2203             while ((obstack_object_size (&extrapool)
2204                     & (__alignof__ (int32_t) - 1)) != 0)
2205               obstack_1grow_fast (&extrapool, '\0');
2206           }
2207       }
2208
2209   /* Add padding to the tables if necessary.  */
2210   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2211          != 0)
2212     obstack_1grow (&weightpool, 0);
2213
2214   /* Now add the four tables.  */
2215   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2216   iov[2 + cnt].iov_base = tablemb;
2217   iov[2 + cnt].iov_len = sizeof (tablemb);
2218   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2219   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2220   ++cnt;
2221
2222   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2223   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2224   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2225   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2226   ++cnt;
2227
2228   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2229   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2230   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2231   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2232   ++cnt;
2233
2234   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2235   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2236   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2237   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2238   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2239   ++cnt;
2240
2241
2242   /* Now the same for the wide character table.  We need to store some
2243      more information here.  */
2244   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2245   iov[2 + cnt].iov_base = NULL;
2246   iov[2 + cnt].iov_len = 0;
2247   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2248   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2249   ++cnt;
2250
2251   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2252   iov[2 + cnt].iov_base = NULL;
2253   iov[2 + cnt].iov_len = 0;
2254   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2255   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2256   ++cnt;
2257
2258   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2259   iov[2 + cnt].iov_base = NULL;
2260   iov[2 + cnt].iov_len = 0;
2261   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2262   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2263   ++cnt;
2264
2265   /* Since we are using the sign of an integer to mark indirection the
2266      offsets in the arrays we are indirectly referring to must not be
2267      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2268   obstack_int32_grow (&extrapool, 0);
2269   obstack_int32_grow (&indirectpool, 0);
2270
2271   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2272      will probably be used more than once it is good to store the
2273      weights only once.  */
2274   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2275     abort ();
2276
2277   /* Generate the table.  Walk through the lists of sequences starting
2278      with the same wide character and add them one after the other to
2279      the table.  In case we have more than one sequence starting with
2280      the same byte we have to use extra indirection.  */
2281   {
2282     auto void add_to_tablewc (uint32_t ch, struct element_t *runp);
2283
2284     void add_to_tablewc (uint32_t ch, struct element_t *runp)
2285       {
2286         if (runp->wcnext == NULL && runp->nwcs == 1)
2287           {
2288             int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2289             collidx_table_add (&tablewc, ch, weigthidx);
2290           }
2291         else
2292           {
2293             /* As for the singlebyte table, we recognize sequences and
2294                compress them.  */
2295             struct element_t *lastp;
2296
2297             collidx_table_add (&tablewc, ch,
2298                                -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2299
2300             do
2301               {
2302                 /* Store the current index in the weight table.  We know that
2303                    the current position in the `extrapool' is aligned on a
2304                    32-bit address.  */
2305                 int32_t weightidx;
2306                 int added;
2307
2308                 /* Find out wether this is a single entry or we have more than
2309                    one consecutive entry.  */
2310                 if (runp->wcnext != NULL
2311                     && runp->nwcs == runp->wcnext->nwcs
2312                     && wmemcmp ((wchar_t *) runp->wcs,
2313                                 (wchar_t *)runp->wcnext->wcs,
2314                                 runp->nwcs - 1) == 0
2315                     && (runp->wcs[runp->nwcs - 1]
2316                         == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2317                   {
2318                     int i;
2319                     struct element_t *series_startp = runp;
2320                     struct element_t *curp;
2321
2322                     /* Now add first the initial byte sequence.  */
2323                     added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2324                     if (sizeof (int32_t) == sizeof (int))
2325                       obstack_make_room (&extrapool, added);
2326
2327                     /* More than one consecutive entry.  We mark this by having
2328                        a negative index into the indirect table.  */
2329                     obstack_int32_grow_fast (&extrapool,
2330                                              -(obstack_object_size (&indirectpool)
2331                                                / sizeof (int32_t)));
2332                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2333
2334                     do
2335                       runp = runp->wcnext;
2336                     while (runp->wcnext != NULL
2337                            && runp->nwcs == runp->wcnext->nwcs
2338                            && wmemcmp ((wchar_t *) runp->wcs,
2339                                        (wchar_t *)runp->wcnext->wcs,
2340                                        runp->nwcs - 1) == 0
2341                            && (runp->wcs[runp->nwcs - 1]
2342                                == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2343
2344                     /* Now walk backward from here to the beginning.  */
2345                     curp = runp;
2346
2347                     for (i = 1; i < runp->nwcs; ++i)
2348                       obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2349
2350                     /* Now find the end of the consecutive sequence and
2351                        add all the indeces in the indirect pool.  */
2352                     do
2353                       {
2354                         weightidx = output_weightwc (&weightpool, collate,
2355                                                      curp);
2356                         obstack_int32_grow (&indirectpool, weightidx);
2357
2358                         curp = curp->wclast;
2359                       }
2360                     while (curp != series_startp);
2361
2362                     /* Add the final weight.  */
2363                     weightidx = output_weightwc (&weightpool, collate, curp);
2364                     obstack_int32_grow (&indirectpool, weightidx);
2365
2366                     /* And add the end byte sequence.  Without length this
2367                        time.  */
2368                     for (i = 1; i < curp->nwcs; ++i)
2369                       obstack_int32_grow (&extrapool, curp->wcs[i]);
2370                   }
2371                 else
2372                   {
2373                     /* A single entry.  Simply add the index and the length and
2374                        string (except for the first character which is already
2375                        tested for).  */
2376                     int i;
2377
2378                     /* Output the weight info.  */
2379                     weightidx = output_weightwc (&weightpool, collate, runp);
2380
2381                     added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2382                     if (sizeof (int) == sizeof (int32_t))
2383                       obstack_make_room (&extrapool, added);
2384
2385                     obstack_int32_grow_fast (&extrapool, weightidx);
2386                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2387                     for (i = 1; i < runp->nwcs; ++i)
2388                       obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2389                   }
2390
2391                 /* Next entry.  */
2392                 lastp = runp;
2393                 runp = runp->wcnext;
2394               }
2395             while (runp != NULL);
2396           }
2397       }
2398
2399     tablewc.p = 6;
2400     tablewc.q = 10;
2401     collidx_table_init (&tablewc);
2402
2403     wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2404
2405     collidx_table_finalize (&tablewc);
2406   }
2407
2408   /* Now add the four tables.  */
2409   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2410   iov[2 + cnt].iov_base = tablewc.result;
2411   iov[2 + cnt].iov_len = tablewc.result_size;
2412   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2413   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2414   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2415   ++cnt;
2416
2417   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2418   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2419   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2420   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2421   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2422   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2423   ++cnt;
2424
2425   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2426   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2427   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2428   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2429   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2430   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2431   ++cnt;
2432
2433   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2434   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2435   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2436   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2437   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2438   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2439   ++cnt;
2440
2441
2442   /* Finally write the table with collation element names out.  It is
2443      a hash table with a simple function which gets the name of the
2444      character as the input.  One character might have many names.  The
2445      value associated with the name is an index into the weight table
2446      where we are then interested in the first-level weight value.
2447
2448      To determine how large the table should be we are counting the
2449      elements have to put in.  Since we are using internal chaining
2450      using a secondary hash function we have to make the table a bit
2451      larger to avoid extremely long search times.  We can achieve
2452      good results with a 40% larger table than there are entries.  */
2453   elem_size = 0;
2454   runp = collate->start;
2455   while (runp != NULL)
2456     {
2457       if (runp->mbs != NULL && runp->weights != NULL)
2458         /* Yep, the element really counts.  */
2459         ++elem_size;
2460
2461       runp = runp->next;
2462     }
2463   /* Add 40% and find the next prime number.  */
2464   elem_size = MIN (next_prime (elem_size * 1.4), 257);
2465
2466   /* Allocate the table.  Each entry consists of two words: the hash
2467      value and an index in a secondary table which provides the index
2468      into the weight table and the string itself (so that a match can
2469      be determined).  */
2470   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2471                                            elem_size * 2 * sizeof (uint32_t));
2472   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2473
2474   /* Now add the elements.  */
2475   runp = collate->start;
2476   while (runp != NULL)
2477     {
2478       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2479         {
2480           /* Compute the hash value of the name.  */
2481           uint32_t namelen = strlen (runp->name);
2482           uint32_t hash = elem_hash (runp->name, namelen);
2483           size_t idx = hash % elem_size;
2484
2485           if (elem_table[idx * 2] != 0)
2486             {
2487               /* The spot is already take.  Try iterating using the value
2488                  from the secondary hashing function.  */
2489               size_t iter = hash % (elem_size - 2);
2490
2491               do
2492                 {
2493                   idx += iter;
2494                   if (idx >= elem_size)
2495                     idx -= elem_size;
2496                 }
2497               while (elem_table[idx * 2] != 0);
2498             }
2499           /* This is the spot where we will insert the value.  */
2500           elem_table[idx * 2] = hash;
2501           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2502
2503           /* The the string itself including length.  */
2504           obstack_1grow (&extrapool, namelen);
2505           obstack_grow (&extrapool, runp->name, namelen);
2506
2507           /* And the multibyte representation.  */
2508           obstack_1grow (&extrapool, runp->nmbs);
2509           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2510
2511           /* And align again to 32 bits.  */
2512           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2513             obstack_grow (&extrapool, "\0\0",
2514                           (sizeof (int32_t)
2515                            - ((1 + namelen + 1 + runp->nmbs)
2516                               % sizeof (int32_t))));
2517
2518           /* Now some 32-bit values: multibyte collation sequence,
2519              wide char string (including length), and wide char
2520              collation sequence.  */
2521           obstack_int32_grow (&extrapool, runp->mbseqorder);
2522
2523           obstack_int32_grow (&extrapool, runp->nwcs);
2524           obstack_grow (&extrapool, runp->wcs,
2525                         runp->nwcs * sizeof (uint32_t));
2526
2527           obstack_int32_grow (&extrapool, runp->wcseqorder);
2528         }
2529
2530       runp = runp->next;
2531     }
2532
2533   /* Prepare to write out this data.  */
2534   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2535   iov[2 + cnt].iov_base = &elem_size;
2536   iov[2 + cnt].iov_len = sizeof (int32_t);
2537   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2538   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2539   ++cnt;
2540
2541   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2542   iov[2 + cnt].iov_base = elem_table;
2543   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2544   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2545   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2546   ++cnt;
2547
2548   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2549   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2550   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2551   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2552   ++cnt;
2553
2554   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2555   iov[2 + cnt].iov_base = collate->mbseqorder;
2556   iov[2 + cnt].iov_len = 256;
2557   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2558   ++cnt;
2559
2560   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2561   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2562   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2563   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2564   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2565   ++cnt;
2566
2567   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2568   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2569   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2570   ++cnt;
2571
2572   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2573
2574   write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
2575
2576   obstack_free (&weightpool, NULL);
2577   obstack_free (&extrapool, NULL);
2578   obstack_free (&indirectpool, NULL);
2579 }
2580
2581
2582 void
2583 collate_read (struct linereader *ldfile, struct localedef_t *result,
2584               const struct charmap_t *charmap, const char *repertoire_name,
2585               int ignore_content)
2586 {
2587   struct repertoire_t *repertoire = NULL;
2588   struct locale_collate_t *collate;
2589   struct token *now;
2590   struct token *arg = NULL;
2591   enum token_t nowtok;
2592   enum token_t was_ellipsis = tok_none;
2593   struct localedef_t *copy_locale = NULL;
2594   /* Parsing state:
2595      0 - start
2596      1 - between `order-start' and `order-end'
2597      2 - after `order-end'
2598      3 - after `reorder-after', waiting for `reorder-end'
2599      4 - after `reorder-end'
2600      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2601      6 - after `reorder-sections-end'
2602   */
2603   int state = 0;
2604
2605   /* Get the repertoire we have to use.  */
2606   if (repertoire_name != NULL)
2607     repertoire = repertoire_read (repertoire_name);
2608
2609   /* The rest of the line containing `LC_COLLATE' must be free.  */
2610   lr_ignore_rest (ldfile, 1);
2611
2612   do
2613     {
2614       now = lr_token (ldfile, charmap, result, NULL, verbose);
2615       nowtok = now->tok;
2616     }
2617   while (nowtok == tok_eol);
2618
2619   if (nowtok == tok_copy)
2620     {
2621       state = 2;
2622       now = lr_token (ldfile, charmap, result, NULL, verbose);
2623       if (now->tok != tok_string)
2624         {
2625           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2626
2627         skip_category:
2628           do
2629             now = lr_token (ldfile, charmap, result, NULL, verbose);
2630           while (now->tok != tok_eof && now->tok != tok_end);
2631
2632           if (now->tok != tok_eof
2633               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2634                   now->tok == tok_eof))
2635             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2636           else if (now->tok != tok_lc_collate)
2637             {
2638               lr_error (ldfile, _("\
2639 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2640               lr_ignore_rest (ldfile, 0);
2641             }
2642           else
2643             lr_ignore_rest (ldfile, 1);
2644
2645           return;
2646         }
2647
2648       if (! ignore_content)
2649         {
2650           /* Get the locale definition.  */
2651           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2652                                      repertoire_name, charmap, NULL);
2653           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2654             {
2655               /* Not yet loaded.  So do it now.  */
2656               if (locfile_read (copy_locale, charmap) != 0)
2657                 goto skip_category;
2658             }
2659         }
2660
2661       lr_ignore_rest (ldfile, 1);
2662
2663       now = lr_token (ldfile, charmap, result, NULL, verbose);
2664       nowtok = now->tok;
2665     }
2666
2667   /* Prepare the data structures.  */
2668   collate_startup (ldfile, result, copy_locale, ignore_content);
2669   collate = result->categories[LC_COLLATE].collate;
2670
2671   while (1)
2672     {
2673       char ucs4buf[10];
2674       char *symstr;
2675       size_t symlen;
2676
2677       /* Of course we don't proceed beyond the end of file.  */
2678       if (nowtok == tok_eof)
2679         break;
2680
2681       /* Ingore empty lines.  */
2682       if (nowtok == tok_eol)
2683         {
2684           now = lr_token (ldfile, charmap, result, NULL, verbose);
2685           nowtok = now->tok;
2686           continue;
2687         }
2688
2689       switch (nowtok)
2690         {
2691         case tok_copy:
2692           /* Allow copying other locales.  */
2693           now = lr_token (ldfile, charmap, result, NULL, verbose);
2694           if (now->tok != tok_string)
2695             goto err_label;
2696
2697           if (! ignore_content)
2698             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2699                          charmap, result);
2700
2701           lr_ignore_rest (ldfile, 1);
2702           break;
2703
2704         case tok_coll_weight_max:
2705           /* Ignore the rest of the line if we don't need the input of
2706              this line.  */
2707           if (ignore_content)
2708             {
2709               lr_ignore_rest (ldfile, 0);
2710               break;
2711             }
2712
2713           if (state != 0)
2714             goto err_label;
2715
2716           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2717           if (arg->tok != tok_number)
2718             goto err_label;
2719           if (collate->col_weight_max != -1)
2720             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2721                       "LC_COLLATE", "col_weight_max");
2722           else
2723             collate->col_weight_max = arg->val.num;
2724           lr_ignore_rest (ldfile, 1);
2725           break;
2726
2727         case tok_section_symbol:
2728           /* Ignore the rest of the line if we don't need the input of
2729              this line.  */
2730           if (ignore_content)
2731             {
2732               lr_ignore_rest (ldfile, 0);
2733               break;
2734             }
2735
2736           if (state != 0)
2737             goto err_label;
2738
2739           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2740           if (arg->tok != tok_bsymbol)
2741             goto err_label;
2742           else if (!ignore_content)
2743             {
2744               /* Check whether this section is already known.  */
2745               struct section_list *known = collate->sections;
2746               while (known != NULL)
2747                 {
2748                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2749                     break;
2750                   known = known->next;
2751                 }
2752
2753               if (known != NULL)
2754                 {
2755                   lr_error (ldfile,
2756                             _("%s: duplicate declaration of section `%s'"),
2757                             "LC_COLLATE", arg->val.str.startmb);
2758                   free (arg->val.str.startmb);
2759                 }
2760               else
2761                 collate->sections = make_seclist_elem (collate,
2762                                                        arg->val.str.startmb,
2763                                                        collate->sections);
2764
2765               lr_ignore_rest (ldfile, known == NULL);
2766             }
2767           else
2768             {
2769               free (arg->val.str.startmb);
2770               lr_ignore_rest (ldfile, 0);
2771             }
2772           break;
2773
2774         case tok_collating_element:
2775           /* Ignore the rest of the line if we don't need the input of
2776              this line.  */
2777           if (ignore_content)
2778             {
2779               lr_ignore_rest (ldfile, 0);
2780               break;
2781             }
2782
2783           if (state != 0 && state != 2)
2784             goto err_label;
2785
2786           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2787           if (arg->tok != tok_bsymbol)
2788             goto err_label;
2789           else
2790             {
2791               const char *symbol = arg->val.str.startmb;
2792               size_t symbol_len = arg->val.str.lenmb;
2793
2794               /* Next the `from' keyword.  */
2795               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2796               if (arg->tok != tok_from)
2797                 {
2798                   free ((char *) symbol);
2799                   goto err_label;
2800                 }
2801
2802               ldfile->return_widestr = 1;
2803               ldfile->translate_strings = 1;
2804
2805               /* Finally the string with the replacement.  */
2806               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2807
2808               ldfile->return_widestr = 0;
2809               ldfile->translate_strings = 0;
2810
2811               if (arg->tok != tok_string)
2812                 goto err_label;
2813
2814               if (!ignore_content && symbol != NULL)
2815                 {
2816                   /* The name is already defined.  */
2817                   if (check_duplicate (ldfile, collate, charmap,
2818                                        repertoire, symbol, symbol_len))
2819                     goto col_elem_free;
2820
2821                   if (arg->val.str.startmb != NULL)
2822                     insert_entry (&collate->elem_table, symbol, symbol_len,
2823                                   new_element (collate,
2824                                                arg->val.str.startmb,
2825                                                arg->val.str.lenmb - 1,
2826                                                arg->val.str.startwc,
2827                                                symbol, symbol_len, 0));
2828                 }
2829               else
2830                 {
2831                 col_elem_free:
2832                   if (symbol != NULL)
2833                     free ((char *) symbol);
2834                   if (arg->val.str.startmb != NULL)
2835                     free (arg->val.str.startmb);
2836                   if (arg->val.str.startwc != NULL)
2837                     free (arg->val.str.startwc);
2838                 }
2839               lr_ignore_rest (ldfile, 1);
2840             }
2841           break;
2842
2843         case tok_collating_symbol:
2844           /* Ignore the rest of the line if we don't need the input of
2845              this line.  */
2846           if (ignore_content)
2847             {
2848               lr_ignore_rest (ldfile, 0);
2849               break;
2850             }
2851
2852           if (state != 0 && state != 2)
2853             goto err_label;
2854
2855           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2856           if (arg->tok != tok_bsymbol)
2857             goto err_label;
2858           else
2859             {
2860               char *symbol = arg->val.str.startmb;
2861               size_t symbol_len = arg->val.str.lenmb;
2862               char *endsymbol = NULL;
2863               size_t endsymbol_len = 0;
2864               enum token_t ellipsis = tok_none;
2865
2866               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2867               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2868                 {
2869                   ellipsis = arg->tok;
2870
2871                   arg = lr_token (ldfile, charmap, result, repertoire,
2872                                   verbose);
2873                   if (arg->tok != tok_bsymbol)
2874                     {
2875                       free (symbol);
2876                       goto err_label;
2877                     }
2878
2879                   endsymbol = arg->val.str.startmb;
2880                   endsymbol_len = arg->val.str.lenmb;
2881
2882                   lr_ignore_rest (ldfile, 1);
2883                 }
2884               else if (arg->tok != tok_eol)
2885                 {
2886                   free (symbol);
2887                   goto err_label;
2888                 }
2889
2890               if (!ignore_content)
2891                 {
2892                   if (symbol == NULL
2893                       || (ellipsis != tok_none && endsymbol == NULL))
2894                     {
2895                       lr_error (ldfile, _("\
2896 %s: unknown character in collating symbol name"),
2897                                 "LC_COLLATE");
2898                       goto col_sym_free;
2899                     }
2900                   else if (ellipsis == tok_none)
2901                     {
2902                       /* A single symbol, no ellipsis.  */
2903                       if (check_duplicate (ldfile, collate, charmap,
2904                                            repertoire, symbol, symbol_len))
2905                         /* The name is already defined.  */
2906                         goto col_sym_free;
2907
2908                       insert_entry (&collate->sym_table, symbol, symbol_len,
2909                                     new_symbol (collate, symbol, symbol_len));
2910                     }
2911                   else if (symbol_len != endsymbol_len)
2912                     {
2913                     col_sym_inv_range:
2914                       lr_error (ldfile,
2915                                 _("invalid names for character range"));
2916                       goto col_sym_free;
2917                     }
2918                   else
2919                     {
2920                       /* Oh my, we have to handle an ellipsis.  First, as
2921                          usual, determine the common prefix and then
2922                          convert the rest into a range.  */
2923                       size_t prefixlen;
2924                       unsigned long int from;
2925                       unsigned long int to;
2926                       char *endp;
2927
2928                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2929                         if (symbol[prefixlen] != endsymbol[prefixlen])
2930                           break;
2931
2932                       /* Convert the rest into numbers.  */
2933                       symbol[symbol_len] = '\0';
2934                       from = strtoul (&symbol[prefixlen], &endp,
2935                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2936                       if (*endp != '\0')
2937                         goto col_sym_inv_range;
2938
2939                       endsymbol[symbol_len] = '\0';
2940                       to = strtoul (&endsymbol[prefixlen], &endp,
2941                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2942                       if (*endp != '\0')
2943                         goto col_sym_inv_range;
2944
2945                       if (from > to)
2946                         goto col_sym_inv_range;
2947
2948                       /* Now loop over all entries.  */
2949                       while (from <= to)
2950                         {
2951                           char *symbuf;
2952
2953                           symbuf = (char *) obstack_alloc (&collate->mempool,
2954                                                            symbol_len + 1);
2955
2956                           /* Create the name.  */
2957                           sprintf (symbuf,
2958                                    ellipsis == tok_ellipsis2
2959                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2960                                    (int) prefixlen, symbol,
2961                                    (int) (symbol_len - prefixlen), from);
2962
2963                           if (check_duplicate (ldfile, collate, charmap,
2964                                                repertoire, symbuf, symbol_len))
2965                             /* The name is already defined.  */
2966                             goto col_sym_free;
2967
2968                           insert_entry (&collate->sym_table, symbuf,
2969                                         symbol_len,
2970                                         new_symbol (collate, symbuf,
2971                                                     symbol_len));
2972
2973                           /* Increment the counter.  */
2974                           ++from;
2975                         }
2976
2977                       goto col_sym_free;
2978                     }
2979                 }
2980               else
2981                 {
2982                 col_sym_free:
2983                   if (symbol != NULL)
2984                     free (symbol);
2985                   if (endsymbol != NULL)
2986                     free (endsymbol);
2987                 }
2988             }
2989           break;
2990
2991         case tok_symbol_equivalence:
2992           /* Ignore the rest of the line if we don't need the input of
2993              this line.  */
2994           if (ignore_content)
2995             {
2996               lr_ignore_rest (ldfile, 0);
2997               break;
2998             }
2999
3000           if (state != 0)
3001             goto err_label;
3002
3003           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3004           if (arg->tok != tok_bsymbol)
3005             goto err_label;
3006           else
3007             {
3008               const char *newname = arg->val.str.startmb;
3009               size_t newname_len = arg->val.str.lenmb;
3010               const char *symname;
3011               size_t symname_len;
3012               struct symbol_t *symval;
3013
3014               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3015               if (arg->tok != tok_bsymbol)
3016                 {
3017                   if (newname != NULL)
3018                     free ((char *) newname);
3019                   goto err_label;
3020                 }
3021
3022               symname = arg->val.str.startmb;
3023               symname_len = arg->val.str.lenmb;
3024
3025               if (newname == NULL)
3026                 {
3027                   lr_error (ldfile, _("\
3028 %s: unknown character in equivalent definition name"),
3029                             "LC_COLLATE");
3030
3031                 sym_equiv_free:
3032                   if (newname != NULL)
3033                     free ((char *) newname);
3034                   if (symname != NULL)
3035                     free ((char *) symname);
3036                   break;
3037                 }
3038               if (symname == NULL)
3039                 {
3040                   lr_error (ldfile, _("\
3041 %s: unknown character in equivalent definition value"),
3042                             "LC_COLLATE");
3043                   goto sym_equiv_free;
3044                 }
3045
3046               /* See whether the symbol name is already defined.  */
3047               if (find_entry (&collate->sym_table, symname, symname_len,
3048                               (void **) &symval) != 0)
3049                 {
3050                   lr_error (ldfile, _("\
3051 %s: unknown symbol `%s' in equivalent definition"),
3052                             "LC_COLLATE", symname);
3053                   goto col_sym_free;
3054                 }
3055
3056               if (insert_entry (&collate->sym_table,
3057                                 newname, newname_len, symval) < 0)
3058                 {
3059                   lr_error (ldfile, _("\
3060 error while adding equivalent collating symbol"));
3061                   goto sym_equiv_free;
3062                 }
3063
3064               free ((char *) symname);
3065             }
3066           lr_ignore_rest (ldfile, 1);
3067           break;
3068
3069         case tok_script:
3070           /* We get told about the scripts we know.  */
3071           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3072           if (arg->tok != tok_bsymbol)
3073             goto err_label;
3074           else
3075             {
3076               struct section_list *runp = collate->known_sections;
3077               char *name;
3078
3079               while (runp != NULL)
3080                 if (strncmp (runp->name, arg->val.str.startmb,
3081                              arg->val.str.lenmb) == 0
3082                     && runp->name[arg->val.str.lenmb] == '\0')
3083                   break;
3084                 else
3085                   runp = runp->def_next;
3086
3087               if (runp != NULL)
3088                 {
3089                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3090                             runp->name);
3091                   lr_ignore_rest (ldfile, 0);
3092                   break;
3093                 }
3094
3095               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3096               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3097               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3098               name[arg->val.str.lenmb] = '\0';
3099               runp->name = name;
3100
3101               runp->def_next = collate->known_sections;
3102               collate->known_sections = runp;
3103             }
3104           lr_ignore_rest (ldfile, 1);
3105           break;
3106
3107         case tok_order_start:
3108           /* Ignore the rest of the line if we don't need the input of
3109              this line.  */
3110           if (ignore_content)
3111             {
3112               lr_ignore_rest (ldfile, 0);
3113               break;
3114             }
3115
3116           if (state != 0 && state != 1)
3117             goto err_label;
3118           state = 1;
3119
3120           /* The 14652 draft does not specify whether all `order_start' lines
3121              must contain the same number of sort-rules, but 14651 does.  So
3122              we require this here as well.  */
3123           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3124           if (arg->tok == tok_bsymbol)
3125             {
3126               /* This better should be a section name.  */
3127               struct section_list *sp = collate->known_sections;
3128               while (sp != NULL
3129                      && (sp->name == NULL
3130                          || strncmp (sp->name, arg->val.str.startmb,
3131                                      arg->val.str.lenmb) != 0
3132                          || sp->name[arg->val.str.lenmb] != '\0'))
3133                 sp = sp->def_next;
3134
3135               if (sp == NULL)
3136                 {
3137                   lr_error (ldfile, _("\
3138 %s: unknown section name `%s'"),
3139                             "LC_COLLATE", arg->val.str.startmb);
3140                   /* We use the error section.  */
3141                   collate->current_section = &collate->error_section;
3142
3143                   if (collate->error_section.first == NULL)
3144                     {
3145                       /* Insert &collate->error_section at the end of
3146                          the collate->sections list.  */
3147                       if (collate->sections == NULL)
3148                         collate->sections = &collate->error_section;
3149                       else
3150                         {
3151                           sp = collate->sections;
3152                           while (sp->next != NULL)
3153                             sp = sp->next;
3154
3155                           sp->next = &collate->error_section;
3156                         }
3157                       collate->error_section.next = NULL;
3158                     }
3159                 }
3160               else
3161                 {
3162                   /* One should not be allowed to open the same
3163                      section twice.  */
3164                   if (sp->first != NULL)
3165                     lr_error (ldfile, _("\
3166 %s: multiple order definitions for section `%s'"),
3167                               "LC_COLLATE", sp->name);
3168                   else
3169                     {
3170                       /* Insert sp in the collate->sections list,
3171                          right after collate->current_section.  */
3172                       if (collate->current_section == NULL)
3173                         collate->current_section = sp;
3174                       else
3175                         {
3176                           sp->next = collate->current_section->next;
3177                           collate->current_section->next = sp;
3178                         }
3179                     }
3180
3181                   /* Next should come the end of the line or a semicolon.  */
3182                   arg = lr_token (ldfile, charmap, result, repertoire,
3183                                   verbose);
3184                   if (arg->tok == tok_eol)
3185                     {
3186                       uint32_t cnt;
3187
3188                       /* This means we have exactly one rule: `forward'.  */
3189                       if (nrules > 1)
3190                         lr_error (ldfile, _("\
3191 %s: invalid number of sorting rules"),
3192                                   "LC_COLLATE");
3193                       else
3194                         nrules = 1;
3195                       sp->rules = obstack_alloc (&collate->mempool,
3196                                                  (sizeof (enum coll_sort_rule)
3197                                                   * nrules));
3198                       for (cnt = 0; cnt < nrules; ++cnt)
3199                         sp->rules[cnt] = sort_forward;
3200
3201                       /* Next line.  */
3202                       break;
3203                     }
3204
3205                   /* Get the next token.  */
3206                   arg = lr_token (ldfile, charmap, result, repertoire,
3207                                   verbose);
3208                 }
3209             }
3210           else
3211             {
3212               /* There is no section symbol.  Therefore we use the unnamed
3213                  section.  */
3214               collate->current_section = &collate->unnamed_section;
3215
3216               if (collate->unnamed_section.first != NULL)
3217                 lr_error (ldfile, _("\
3218 %s: multiple order definitions for unnamed section"),
3219                           "LC_COLLATE");
3220               else
3221                 {
3222                   /* Insert &collate->unnamed_section at the beginning of
3223                      the collate->sections list.  */
3224                   collate->unnamed_section.next = collate->sections;
3225                   collate->sections = &collate->unnamed_section;
3226                 }
3227             }
3228
3229           /* Now read the direction names.  */
3230           read_directions (ldfile, arg, charmap, repertoire, result);
3231
3232           /* From now we need the strings untranslated.  */
3233           ldfile->translate_strings = 0;
3234           break;
3235
3236         case tok_order_end:
3237           /* Ignore the rest of the line if we don't need the input of
3238              this line.  */
3239           if (ignore_content)
3240             {
3241               lr_ignore_rest (ldfile, 0);
3242               break;
3243             }
3244
3245           if (state != 1)
3246             goto err_label;
3247
3248           /* Handle ellipsis at end of list.  */
3249           if (was_ellipsis != tok_none)
3250             {
3251               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3252                                repertoire, result);
3253               was_ellipsis = tok_none;
3254             }
3255
3256           state = 2;
3257           lr_ignore_rest (ldfile, 1);
3258           break;
3259
3260         case tok_reorder_after:
3261           /* Ignore the rest of the line if we don't need the input of
3262              this line.  */
3263           if (ignore_content)
3264             {
3265               lr_ignore_rest (ldfile, 0);
3266               break;
3267             }
3268
3269           if (state == 1)
3270             {
3271               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3272                         "LC_COLLATE");
3273               state = 2;
3274
3275               /* Handle ellipsis at end of list.  */
3276               if (was_ellipsis != tok_none)
3277                 {
3278                   handle_ellipsis (ldfile, arg->val.str.startmb,
3279                                    arg->val.str.lenmb, was_ellipsis, charmap,
3280                                    repertoire, result);
3281                   was_ellipsis = tok_none;
3282                 }
3283             }
3284           else if (state != 2 && state != 3)
3285             goto err_label;
3286           state = 3;
3287
3288           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3289           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3290             {
3291               /* Find this symbol in the sequence table.  */
3292               char ucsbuf[10];
3293               char *startmb;
3294               size_t lenmb;
3295               struct element_t *insp;
3296               int no_error = 1;
3297
3298               if (arg->tok == tok_bsymbol)
3299                 {
3300                   startmb = arg->val.str.startmb;
3301                   lenmb = arg->val.str.lenmb;
3302                 }
3303               else
3304                 {
3305                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3306                   startmb = ucsbuf;
3307                   lenmb = 9;
3308                 }
3309
3310               if (find_entry (&collate->seq_table, startmb, lenmb,
3311                               (void **) &insp) == 0)
3312                 /* Yes, the symbol exists.  Simply point the cursor
3313                    to it.  */
3314                 collate->cursor = insp;
3315               else
3316                 {
3317                   struct symbol_t *symbp;
3318
3319                   if (find_entry (&collate->sym_table, startmb, lenmb,
3320                                   (void **) &symbp) == 0)
3321                     {
3322                       if (symbp->order->last != NULL
3323                           || symbp->order->next != NULL)
3324                         collate->cursor = symbp->order;
3325                       else
3326                         {
3327                           /* This is a collating symbol but its position
3328                              is not yet defined.  */
3329                           lr_error (ldfile, _("\
3330 %s: order for collating symbol %.*s not yet defined"),
3331                                     "LC_COLLATE", (int) lenmb, startmb);
3332                           collate->cursor = NULL;
3333                           no_error = 0;
3334                         }
3335                     }
3336                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3337                                        (void **) &insp) == 0)
3338                     {
3339                       if (insp->last != NULL || insp->next != NULL)
3340                         collate->cursor = insp;
3341                       else
3342                         {
3343                           /* This is a collating element but its position
3344                              is not yet defined.  */
3345                           lr_error (ldfile, _("\
3346 %s: order for collating element %.*s not yet defined"),
3347                                     "LC_COLLATE", (int) lenmb, startmb);
3348                           collate->cursor = NULL;
3349                           no_error = 0;
3350                         }
3351                     }
3352                   else
3353                     {
3354                       /* This is bad.  The symbol after which we have to
3355                          insert does not exist.  */
3356                       lr_error (ldfile, _("\
3357 %s: cannot reorder after %.*s: symbol not known"),
3358                                 "LC_COLLATE", (int) lenmb, startmb);
3359                       collate->cursor = NULL;
3360                       no_error = 0;
3361                     }
3362                 }
3363
3364               lr_ignore_rest (ldfile, no_error);
3365             }
3366           else
3367             /* This must not happen.  */
3368             goto err_label;
3369           break;
3370
3371         case tok_reorder_end:
3372           /* Ignore the rest of the line if we don't need the input of
3373              this line.  */
3374           if (ignore_content)
3375             break;
3376
3377           if (state != 3)
3378             goto err_label;
3379           state = 4;
3380           lr_ignore_rest (ldfile, 1);
3381           break;
3382
3383         case tok_reorder_sections_after:
3384           /* Ignore the rest of the line if we don't need the input of
3385              this line.  */
3386           if (ignore_content)
3387             {
3388               lr_ignore_rest (ldfile, 0);
3389               break;
3390             }
3391
3392           if (state == 1)
3393             {
3394               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3395                         "LC_COLLATE");
3396               state = 2;
3397
3398               /* Handle ellipsis at end of list.  */
3399               if (was_ellipsis != tok_none)
3400                 {
3401                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3402                                    repertoire, result);
3403                   was_ellipsis = tok_none;
3404                 }
3405             }
3406           else if (state == 3)
3407             {
3408               WITH_CUR_LOCALE (error (0, 0, _("\
3409 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3410               state = 4;
3411             }
3412           else if (state != 2 && state != 4)
3413             goto err_label;
3414           state = 5;
3415
3416           /* Get the name of the sections we are adding after.  */
3417           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3418           if (arg->tok == tok_bsymbol)
3419             {
3420               /* Now find a section with this name.  */
3421               struct section_list *runp = collate->sections;
3422
3423               while (runp != NULL)
3424                 {
3425                   if (runp->name != NULL
3426                       && strlen (runp->name) == arg->val.str.lenmb
3427                       && memcmp (runp->name, arg->val.str.startmb,
3428                                  arg->val.str.lenmb) == 0)
3429                     break;
3430
3431                   runp = runp->next;
3432                 }
3433
3434               if (runp != NULL)
3435                 collate->current_section = runp;
3436               else
3437                 {
3438                   /* This is bad.  The section after which we have to
3439                      reorder does not exist.  Therefore we cannot
3440                      process the whole rest of this reorder
3441                      specification.  */
3442                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3443                             "LC_COLLATE", (int) arg->val.str.lenmb,
3444                             arg->val.str.startmb);
3445
3446                   do
3447                     {
3448                       lr_ignore_rest (ldfile, 0);
3449
3450                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3451                     }
3452                   while (now->tok == tok_reorder_sections_after
3453                          || now->tok == tok_reorder_sections_end
3454                          || now->tok == tok_end);
3455
3456                   /* Process the token we just saw.  */
3457                   nowtok = now->tok;
3458                   continue;
3459                 }
3460             }
3461           else
3462             /* This must not happen.  */
3463             goto err_label;
3464           break;
3465
3466         case tok_reorder_sections_end:
3467           /* Ignore the rest of the line if we don't need the input of
3468              this line.  */
3469           if (ignore_content)
3470             break;
3471
3472           if (state != 5)
3473             goto err_label;
3474           state = 6;
3475           lr_ignore_rest (ldfile, 1);
3476           break;
3477
3478         case tok_bsymbol:
3479         case tok_ucs4:
3480           /* Ignore the rest of the line if we don't need the input of
3481              this line.  */
3482           if (ignore_content)
3483             {
3484               lr_ignore_rest (ldfile, 0);
3485               break;
3486             }
3487
3488           if (state != 0 && state != 1 && state != 3 && state != 5)
3489             goto err_label;
3490
3491           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3492             goto err_label;
3493
3494           if (nowtok == tok_ucs4)
3495             {
3496               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3497               symstr = ucs4buf;
3498               symlen = 9;
3499             }
3500           else if (arg != NULL)
3501             {
3502               symstr = arg->val.str.startmb;
3503               symlen = arg->val.str.lenmb;
3504             }
3505           else
3506             {
3507               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3508                         ldfile->token.val.str.lenmb,
3509                         ldfile->token.val.str.startmb);
3510               break;
3511             }
3512
3513           if (state == 0)
3514             {
3515               /* We are outside an `order_start' region.  This means
3516                  we must only accept definitions of values for
3517                  collation symbols since these are purely abstract
3518                  values and don't need directions associated.  */
3519               struct element_t *seqp;
3520
3521               if (find_entry (&collate->seq_table, symstr, symlen,
3522                               (void **) &seqp) == 0)
3523                 {
3524                   /* It's already defined.  First check whether this
3525                      is really a collating symbol.  */
3526                   if (seqp->is_character)
3527                     goto err_label;
3528
3529                   goto move_entry;
3530                 }
3531               else
3532                 {
3533                   void *result;
3534
3535                   if (find_entry (&collate->sym_table, symstr, symlen,
3536                                   &result) != 0)
3537                     /* No collating symbol, it's an error.  */
3538                     goto err_label;
3539
3540                   /* Maybe this is the first time we define a symbol
3541                      value and it is before the first actual section.  */
3542                   if (collate->sections == NULL)
3543                     collate->sections = collate->current_section =
3544                       &collate->symbol_section;
3545                 }
3546
3547               if (was_ellipsis != tok_none)
3548                 {
3549
3550                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3551                                    charmap, repertoire, result);
3552
3553                   /* Remember that we processed the ellipsis.  */
3554                   was_ellipsis = tok_none;
3555
3556                   /* And don't add the value a second time.  */
3557                   break;
3558                 }
3559             }
3560           else if (state == 3)
3561             {
3562               /* It is possible that we already have this collation sequence.
3563                  In this case we move the entry.  */
3564               struct element_t *seqp;
3565               void *sym;
3566
3567               /* If the symbol after which we have to insert was not found
3568                  ignore all entries.  */
3569               if (collate->cursor == NULL)
3570                 {
3571                   lr_ignore_rest (ldfile, 0);
3572                   break;
3573                 }
3574
3575               if (find_entry (&collate->seq_table, symstr, symlen,
3576                               (void **) &seqp) == 0)
3577                 goto move_entry;
3578
3579               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3580                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3581                 goto move_entry;
3582
3583               if (find_entry (&collate->elem_table, symstr, symlen,
3584                               (void **) &seqp) == 0
3585                   && (seqp->last != NULL || seqp->next != NULL
3586                       || (collate->start != NULL && seqp == collate->start)))
3587                 {
3588                 move_entry:
3589                   /* Remove the entry from the old position.  */
3590                   if (seqp->last == NULL)
3591                     collate->start = seqp->next;
3592                   else
3593                     seqp->last->next = seqp->next;
3594                   if (seqp->next != NULL)
3595                     seqp->next->last = seqp->last;
3596
3597                   /* We also have to check whether this entry is the
3598                      first or last of a section.  */
3599                   if (seqp->section->first == seqp)
3600                     {
3601                       if (seqp->section->first == seqp->section->last)
3602                         /* This section has no content anymore.  */
3603                         seqp->section->first = seqp->section->last = NULL;
3604                       else
3605                         seqp->section->first = seqp->next;
3606                     }
3607                   else if (seqp->section->last == seqp)
3608                     seqp->section->last = seqp->last;
3609
3610                   /* Now insert it in the new place.  */
3611                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3612                                   tok_none);
3613                   break;
3614                 }
3615
3616               /* Otherwise we just add a new entry.  */
3617             }
3618           else if (state == 5)
3619             {
3620               /* We are reordering sections.  Find the named section.  */
3621               struct section_list *runp = collate->sections;
3622               struct section_list *prevp = NULL;
3623
3624               while (runp != NULL)
3625                 {
3626                   if (runp->name != NULL
3627                       && strlen (runp->name) == symlen
3628                       && memcmp (runp->name, symstr, symlen) == 0)
3629                     break;
3630
3631                   prevp = runp;
3632                   runp = runp->next;
3633                 }
3634
3635               if (runp == NULL)
3636                 {
3637                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3638                             "LC_COLLATE", (int) symlen, symstr);
3639                   lr_ignore_rest (ldfile, 0);
3640                 }
3641               else
3642                 {
3643                   if (runp != collate->current_section)
3644                     {
3645                       /* Remove the named section from the old place and
3646                          insert it in the new one.  */
3647                       prevp->next = runp->next;
3648
3649                       runp->next = collate->current_section->next;
3650                       collate->current_section->next = runp;
3651                       collate->current_section = runp;
3652                     }
3653
3654                   /* Process the rest of the line which might change
3655                      the collation rules.  */
3656                   arg = lr_token (ldfile, charmap, result, repertoire,
3657                                   verbose);
3658                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3659                     read_directions (ldfile, arg, charmap, repertoire,
3660                                      result);
3661                 }
3662               break;
3663             }
3664           else if (was_ellipsis != tok_none)
3665             {
3666               /* Using the information in the `ellipsis_weight'
3667                  element and this and the last value we have to handle
3668                  the ellipsis now.  */
3669               assert (state == 1);
3670
3671               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3672                                repertoire, result);
3673
3674               /* Remember that we processed the ellipsis.  */
3675               was_ellipsis = tok_none;
3676
3677               /* And don't add the value a second time.  */
3678               break;
3679             }
3680
3681           /* Now insert in the new place.  */
3682           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3683           break;
3684
3685         case tok_undefined:
3686           /* Ignore the rest of the line if we don't need the input of
3687              this line.  */
3688           if (ignore_content)
3689             {
3690               lr_ignore_rest (ldfile, 0);
3691               break;
3692             }
3693
3694           if (state != 1)
3695             goto err_label;
3696
3697           if (was_ellipsis != tok_none)
3698             {
3699               lr_error (ldfile,
3700                         _("%s: cannot have `%s' as end of ellipsis range"),
3701                         "LC_COLLATE", "UNDEFINED");
3702
3703               unlink_element (collate);
3704               was_ellipsis = tok_none;
3705             }
3706
3707           /* See whether UNDEFINED already appeared somewhere.  */
3708           if (collate->undefined.next != NULL
3709               || &collate->undefined == collate->cursor)
3710             {
3711               lr_error (ldfile,
3712                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3713                         "LC_COLLATE", 9, "UNDEFINED",
3714                         collate->undefined.file,
3715                         collate->undefined.line);
3716               lr_ignore_rest (ldfile, 0);
3717             }
3718           else
3719             /* Parse the weights.  */
3720              insert_weights (ldfile, &collate->undefined, charmap,
3721                              repertoire, result, tok_none);
3722           break;
3723
3724         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3725         case tok_ellipsis3: /* absolute ellipsis */
3726         case tok_ellipsis4: /* symbolic decimal ellipsis */
3727           /* This is the symbolic (decimal or hexadecimal) or absolute
3728              ellipsis.  */
3729           if (was_ellipsis != tok_none)
3730             goto err_label;
3731
3732           if (state != 0 && state != 1 && state != 3)
3733             goto err_label;
3734
3735           was_ellipsis = nowtok;
3736
3737           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3738                           repertoire, result, nowtok);
3739           break;
3740
3741         case tok_end:
3742           /* Next we assume `LC_COLLATE'.  */
3743           if (!ignore_content)
3744             {
3745               if (state == 0)
3746                 /* We must either see a copy statement or have
3747                    ordering values.  */
3748                 lr_error (ldfile,
3749                           _("%s: empty category description not allowed"),
3750                           "LC_COLLATE");
3751               else if (state == 1)
3752                 {
3753                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3754                             "LC_COLLATE");
3755
3756                   /* Handle ellipsis at end of list.  */
3757                   if (was_ellipsis != tok_none)
3758                     {
3759                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3760                                        repertoire, result);
3761                       was_ellipsis = tok_none;
3762                     }
3763                 }
3764               else if (state == 3)
3765                 WITH_CUR_LOCALE (error (0, 0, _("\
3766 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3767               else if (state == 5)
3768                 WITH_CUR_LOCALE (error (0, 0, _("\
3769 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3770             }
3771           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3772           if (arg->tok == tok_eof)
3773             break;
3774           if (arg->tok == tok_eol)
3775             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3776           else if (arg->tok != tok_lc_collate)
3777             lr_error (ldfile, _("\
3778 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3779           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3780           return;
3781
3782         default:
3783         err_label:
3784           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3785         }
3786
3787       /* Prepare for the next round.  */
3788       now = lr_token (ldfile, charmap, result, NULL, verbose);
3789       nowtok = now->tok;
3790     }
3791
3792   /* When we come here we reached the end of the file.  */
3793   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3794 }