locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2003, 2005, 2006, 2007 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License version 2 as
   7    published by the Free Software Foundation.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <errno.h>
  23 #include <error.h>
  24 #include <stdlib.h>
  25 #include <wchar.h>
  26 #include <sys/param.h>
  27
  28 #include "localedef.h"
  29 #include "charmap.h"
  30 #include "localeinfo.h"
  31 #include "linereader.h"
  32 #include "locfile.h"
  33 #include "elem-hash.h"
  34
  35 /* Uncomment the following line in the production version.  */
  36 /* #define NDEBUG 1 */
  37 #include <assert.h>
  38
  39 #define obstack_chunk_alloc malloc
  40 #define obstack_chunk_free free
  41
  42 static inline void
  43 __attribute ((always_inline))
  44 obstack_int32_grow (struct obstack *obstack, int32_t data)
  45 {
  46   if (sizeof (int32_t) == sizeof (int))
  47     obstack_int_grow (obstack, data);
  48   else
  49     obstack_grow (obstack, &data, sizeof (int32_t));
  50 }
  51
  52 static inline void
  53 __attribute ((always_inline))
  54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  55 {
  56   if (sizeof (int32_t) == sizeof (int))
  57     obstack_int_grow_fast (obstack, data);
  58   else
  59     obstack_grow (obstack, &data, sizeof (int32_t));
  60 }
  61
  62 /* Forward declaration.  */
  63 struct element_t;
  64
  65 /* Data type for list of strings.  */
  66 struct section_list
  67 {
  68   /* Successor in the known_sections list.  */
  69   struct section_list *def_next;
  70   /* Successor in the sections list.  */
  71   struct section_list *next;
  72   /* Name of the section.  */
  73   const char *name;
  74   /* First element of this section.  */
  75   struct element_t *first;
  76   /* Last element of this section.  */
  77   struct element_t *last;
  78   /* These are the rules for this section.  */
  79   enum coll_sort_rule *rules;
  80   /* Index of the rule set in the appropriate section of the output file.  */
  81   int ruleidx;
  82 };
  83
  84 struct element_t;
  85
  86 struct element_list_t
  87 {
  88   /* Number of elements.  */
  89   int cnt;
  90
  91   struct element_t **w;
  92 };
  93
  94 /* Data type for collating element.  */
  95 struct element_t
  96 {
  97   const char *name;
  98
  99   const char *mbs;
 100   size_t nmbs;
 101   const uint32_t *wcs;
 102   size_t nwcs;
 103   int *mborder;
 104   int wcorder;
 105
 106   /* The following is a bit mask which bits are set if this element is
 107      used in the appropriate level.  Interesting for the singlebyte
 108      weight computation.
 109
 110      XXX The type here restricts the number of levels to 32.  It could
 111      be changed if necessary but I doubt this is necessary.  */
 112   unsigned int used_in_level;
 113
 114   struct element_list_t *weights;
 115
 116   /* Nonzero if this is a real character definition.  */
 117   int is_character;
 118
 119   /* Order of the character in the sequence.  This information will
 120      be used in range expressions.  */
 121   int mbseqorder;
 122   int wcseqorder;
 123
 124   /* Where does the definition come from.  */
 125   const char *file;
 126   size_t line;
 127
 128   /* Which section does this belong to.  */
 129   struct section_list *section;
 130
 131   /* Predecessor and successor in the order list.  */
 132   struct element_t *last;
 133   struct element_t *next;
 134
 135   /* Next element in multibyte output list.  */
 136   struct element_t *mbnext;
 137   struct element_t *mblast;
 138
 139   /* Next element in wide character output list.  */
 140   struct element_t *wcnext;
 141   struct element_t *wclast;
 142 };
 143
 144 /* Special element value.  */
 145 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 146 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 147 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 148
 149 /* Data type for collating symbol.  */
 150 struct symbol_t
 151 {
 152   const char *name;
 153
 154   /* Point to place in the order list.  */
 155   struct element_t *order;
 156
 157   /* Where does the definition come from.  */
 158   const char *file;
 159   size_t line;
 160 };
 161
 162 /* Sparse table of struct element_t *.  */
 163 #define TABLE wchead_table
 164 #define ELEMENT struct element_t *
 165 #define DEFAULT NULL
 166 #define ITERATE
 167 #define NO_FINALIZE
 168 #include "3level.h"
 169
 170 /* Sparse table of int32_t.  */
 171 #define TABLE collidx_table
 172 #define ELEMENT int32_t
 173 #define DEFAULT 0
 174 #include "3level.h"
 175
 176 /* Sparse table of uint32_t.  */
 177 #define TABLE collseq_table
 178 #define ELEMENT uint32_t
 179 #define DEFAULT ~((uint32_t) 0)
 180 #include "3level.h"
 181
 182
 183 /* The real definition of the struct for the LC_COLLATE locale.  */
 184 struct locale_collate_t
 185 {
 186   int col_weight_max;
 187   int cur_weight_max;
 188
 189   /* List of known scripts.  */
 190   struct section_list *known_sections;
 191   /* List of used sections.  */
 192   struct section_list *sections;
 193   /* Current section using definition.  */
 194   struct section_list *current_section;
 195   /* There always can be an unnamed section.  */
 196   struct section_list unnamed_section;
 197   /* To make handling of errors easier we have another section.  */
 198   struct section_list error_section;
 199   /* Sometimes we are defining the values for collating symbols before
 200      the first actual section.  */
 201   struct section_list symbol_section;
 202
 203   /* Start of the order list.  */
 204   struct element_t *start;
 205
 206   /* The undefined element.  */
 207   struct element_t undefined;
 208
 209   /* This is the cursor for `reorder_after' insertions.  */
 210   struct element_t *cursor;
 211
 212   /* This value is used when handling ellipsis.  */
 213   struct element_t ellipsis_weight;
 214
 215   /* Known collating elements.  */
 216   hash_table elem_table;
 217
 218   /* Known collating symbols.  */
 219   hash_table sym_table;
 220
 221   /* Known collation sequences.  */
 222   hash_table seq_table;
 223
 224   struct obstack mempool;
 225
 226   /* The LC_COLLATE category is a bit special as it is sometimes possible
 227      that the definitions from more than one input file contains information.
 228      Therefore we keep all relevant input in a list.  */
 229   struct locale_collate_t *next;
 230
 231   /* Arrays with heads of the list for each of the leading bytes in
 232      the multibyte sequences.  */
 233   struct element_t *mbheads[256];
 234
 235   /* Arrays with heads of the list for each of the leading bytes in
 236      the multibyte sequences.  */
 237   struct wchead_table wcheads;
 238
 239   /* The arrays with the collation sequence order.  */
 240   unsigned char mbseqorder[256];
 241   struct collseq_table wcseqorder;
 242 };
 243
 244
 245 /* We have a few global variables which are used for reading all
 246    LC_COLLATE category descriptions in all files.  */
 247 static uint32_t nrules;
 248
 249
 250 /* We need UTF-8 encoding of numbers.  */
 251 static inline int
 252 __attribute ((always_inline))
 253 utf8_encode (char *buf, int val)
 254 {
 255   int retval;
 256
 257   if (val < 0x80)
 258     {
 259       *buf++ = (char) val;
 260       retval = 1;
 261     }
 262   else
 263     {
 264       int step;
 265
 266       for (step = 2; step < 6; ++step)
 267         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 268           break;
 269       retval = step;
 270
 271       *buf = (unsigned char) (~0xff >> step);
 272       --step;
 273       do
 274         {
 275           buf[step] = 0x80 | (val & 0x3f);
 276           val >>= 6;
 277         }
 278       while (--step > 0);
 279       *buf |= val;
 280     }
 281
 282   return retval;
 283 }
 284
 285
 286 static struct section_list *
 287 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 288                    struct section_list *next)
 289 {
 290   struct section_list *newp;
 291
 292   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 293                                                 sizeof (*newp));
 294   newp->next = next;
 295   newp->name = string;
 296   newp->first = NULL;
 297   newp->last = NULL;
 298
 299   return newp;
 300 }
 301
 302
 303 static struct element_t *
 304 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 305              const uint32_t *wcs, const char *name, size_t namelen,
 306              int is_character)
 307 {
 308   struct element_t *newp;
 309
 310   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 311                                              sizeof (*newp));
 312   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 313                                                     name, namelen);
 314   if (mbs != NULL)
 315     {
 316       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 317       newp->nmbs = mbslen;
 318     }
 319   else
 320     {
 321       newp->mbs = NULL;
 322       newp->nmbs = 0;
 323     }
 324   if (wcs != NULL)
 325     {
 326       size_t nwcs = wcslen ((wchar_t *) wcs);
 327       uint32_t zero = 0;
 328       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 329       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 330       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 331       newp->nwcs = nwcs;
 332     }
 333   else
 334     {
 335       newp->wcs = NULL;
 336       newp->nwcs = 0;
 337     }
 338   newp->mborder = NULL;
 339   newp->wcorder = 0;
 340   newp->used_in_level = 0;
 341   newp->is_character = is_character;
 342
 343   /* Will be assigned later.  XXX  */
 344   newp->mbseqorder = 0;
 345   newp->wcseqorder = 0;
 346
 347   /* Will be allocated later.  */
 348   newp->weights = NULL;
 349
 350   newp->file = NULL;
 351   newp->line = 0;
 352
 353   newp->section = collate->current_section;
 354
 355   newp->last = NULL;
 356   newp->next = NULL;
 357
 358   newp->mbnext = NULL;
 359   newp->mblast = NULL;
 360
 361   newp->wcnext = NULL;
 362   newp->wclast = NULL;
 363
 364   return newp;
 365 }
 366
 367
 368 static struct symbol_t *
 369 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 370 {
 371   struct symbol_t *newp;
 372
 373   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 374
 375   newp->name = obstack_copy0 (&collate->mempool, name, len);
 376   newp->order = NULL;
 377
 378   newp->file = NULL;
 379   newp->line = 0;
 380
 381   return newp;
 382 }
 383
 384
 385 /* Test whether this name is already defined somewhere.  */
 386 static int
 387 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 388                  const struct charmap_t *charmap,
 389                  struct repertoire_t *repertoire, const char *symbol,
 390                  size_t symbol_len)
 391 {
 392   void *ignore = NULL;
 393
 394   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 395     {
 396       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 397                 (int) symbol_len, symbol);
 398       return 1;
 399     }
 400
 401   if (repertoire != NULL
 402       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 403           == 0))
 404     {
 405       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 406                 (int) symbol_len, symbol);
 407       return 1;
 408     }
 409
 410   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 411     {
 412       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 413                 (int) symbol_len, symbol);
 414       return 1;
 415     }
 416
 417   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 418     {
 419       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 420                 (int) symbol_len, symbol);
 421       return 1;
 422     }
 423
 424   return 0;
 425 }
 426
 427
 428 /* Read the direction specification.  */
 429 static void
 430 read_directions (struct linereader *ldfile, struct token *arg,
 431                  const struct charmap_t *charmap,
 432                  struct repertoire_t *repertoire, struct localedef_t *result)
 433 {
 434   int cnt = 0;
 435   int max = nrules ?: 10;
 436   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 437   int warned = 0;
 438   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 439
 440   while (1)
 441     {
 442       int valid = 0;
 443
 444       if (arg->tok == tok_forward)
 445         {
 446           if (rules[cnt] & sort_backward)
 447             {
 448               if (! warned)
 449                 {
 450                   lr_error (ldfile, _("\
 451 %s: `forward' and `backward' are mutually excluding each other"),
 452                             "LC_COLLATE");
 453                   warned = 1;
 454                 }
 455             }
 456           else if (rules[cnt] & sort_forward)
 457             {
 458               if (! warned)
 459                 {
 460                   lr_error (ldfile, _("\
 461 %s: `%s' mentioned more than once in definition of weight %d"),
 462                             "LC_COLLATE", "forward", cnt + 1);
 463                 }
 464             }
 465           else
 466             rules[cnt] |= sort_forward;
 467
 468           valid = 1;
 469         }
 470       else if (arg->tok == tok_backward)
 471         {
 472           if (rules[cnt] & sort_forward)
 473             {
 474               if (! warned)
 475                 {
 476                   lr_error (ldfile, _("\
 477 %s: `forward' and `backward' are mutually excluding each other"),
 478                             "LC_COLLATE");
 479                   warned = 1;
 480                 }
 481             }
 482           else if (rules[cnt] & sort_backward)
 483             {
 484               if (! warned)
 485                 {
 486                   lr_error (ldfile, _("\
 487 %s: `%s' mentioned more than once in definition of weight %d"),
 488                             "LC_COLLATE", "backward", cnt + 1);
 489                 }
 490             }
 491           else
 492             rules[cnt] |= sort_backward;
 493
 494           valid = 1;
 495         }
 496       else if (arg->tok == tok_position)
 497         {
 498           if (rules[cnt] & sort_position)
 499             {
 500               if (! warned)
 501                 {
 502                   lr_error (ldfile, _("\
 503 %s: `%s' mentioned more than once in definition of weight %d"),
 504                             "LC_COLLATE", "position", cnt + 1);
 505                 }
 506             }
 507           else
 508             rules[cnt] |= sort_position;
 509
 510           valid = 1;
 511         }
 512
 513       if (valid)
 514         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 515
 516       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 517           || arg->tok == tok_semicolon)
 518         {
 519           if (! valid && ! warned)
 520             {
 521               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 522               warned = 1;
 523             }
 524
 525           /* See whether we have to increment the counter.  */
 526           if (arg->tok != tok_comma && rules[cnt] != 0)
 527             {
 528               /* Add the default `forward' if we have seen only `position'.  */
 529               if (rules[cnt] == sort_position)
 530                 rules[cnt] = sort_position | sort_forward;
 531
 532               ++cnt;
 533             }
 534
 535           if (arg->tok == tok_eof || arg->tok == tok_eol)
 536             /* End of line or file, so we exit the loop.  */
 537             break;
 538
 539           if (nrules == 0)
 540             {
 541               /* See whether we have enough room in the array.  */
 542               if (cnt == max)
 543                 {
 544                   max += 10;
 545                   rules = (enum coll_sort_rule *) xrealloc (rules,
 546                                                             max
 547                                                             * sizeof (*rules));
 548                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 549                 }
 550             }
 551           else
 552             {
 553               if (cnt == nrules)
 554                 {
 555                   /* There must not be any more rule.  */
 556                   if (! warned)
 557                     {
 558                       lr_error (ldfile, _("\
 559 %s: too many rules; first entry only had %d"),
 560                                 "LC_COLLATE", nrules);
 561                       warned = 1;
 562                     }
 563
 564                   lr_ignore_rest (ldfile, 0);
 565                   break;
 566                 }
 567             }
 568         }
 569       else
 570         {
 571           if (! warned)
 572             {
 573               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 574               warned = 1;
 575             }
 576         }
 577
 578       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 579     }
 580
 581   if (nrules == 0)
 582     {
 583       /* Now we know how many rules we have.  */
 584       nrules = cnt;
 585       rules = (enum coll_sort_rule *) xrealloc (rules,
 586                                                 nrules * sizeof (*rules));
 587     }
 588   else
 589     {
 590       if (cnt < nrules)
 591         {
 592           /* Not enough rules in this specification.  */
 593           if (! warned)
 594             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 595
 596           do
 597             rules[cnt] = sort_forward;
 598           while (++cnt < nrules);
 599         }
 600     }
 601
 602   collate->current_section->rules = rules;
 603 }
 604
 605
 606 static struct element_t *
 607 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 608               const char *str, size_t len)
 609 {
 610   void *result = NULL;
 611
 612   /* Search for the entries among the collation sequences already define.  */
 613   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 614     {
 615       /* Nope, not define yet.  So we see whether it is a
 616          collation symbol.  */
 617       void *ptr;
 618
 619       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 620         {
 621           /* It's a collation symbol.  */
 622           struct symbol_t *sym = (struct symbol_t *) ptr;
 623           result = sym->order;
 624
 625           if (result == NULL)
 626             result = sym->order = new_element (collate, NULL, 0, NULL,
 627                                                NULL, 0, 0);
 628         }
 629       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 630         {
 631           /* It's also no collation element.  So it is a character
 632              element defined later.  */
 633           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 634           /* Insert it into the sequence table.  */
 635           insert_entry (&collate->seq_table, str, len, result);
 636         }
 637     }
 638
 639   return (struct element_t *) result;
 640 }
 641
 642
 643 static void
 644 unlink_element (struct locale_collate_t *collate)
 645 {
 646   if (collate->cursor == collate->start)
 647     {
 648       assert (collate->cursor->next == NULL);
 649       assert (collate->cursor->last == NULL);
 650       collate->cursor = NULL;
 651     }
 652   else
 653     {
 654       if (collate->cursor->next != NULL)
 655         collate->cursor->next->last = collate->cursor->last;
 656       if (collate->cursor->last != NULL)
 657         collate->cursor->last->next = collate->cursor->next;
 658       collate->cursor = collate->cursor->last;
 659     }
 660 }
 661
 662
 663 static void
 664 insert_weights (struct linereader *ldfile, struct element_t *elem,
 665                 const struct charmap_t *charmap,
 666                 struct repertoire_t *repertoire, struct localedef_t *result,
 667                 enum token_t ellipsis)
 668 {
 669   int weight_cnt;
 670   struct token *arg;
 671   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 672
 673   /* Initialize all the fields.  */
 674   elem->file = ldfile->fname;
 675   elem->line = ldfile->lineno;
 676
 677   elem->last = collate->cursor;
 678   elem->next = collate->cursor ? collate->cursor->next : NULL;
 679   if (collate->cursor != NULL && collate->cursor->next != NULL)
 680     collate->cursor->next->last = elem;
 681   if (collate->cursor != NULL)
 682     collate->cursor->next = elem;
 683   if (collate->start == NULL)
 684     {
 685       assert (collate->cursor == NULL);
 686       collate->start = elem;
 687     }
 688
 689   elem->section = collate->current_section;
 690
 691   if (collate->current_section->first == NULL)
 692     collate->current_section->first = elem;
 693   if (collate->current_section->last == collate->cursor)
 694     collate->current_section->last = elem;
 695
 696   collate->cursor = elem;
 697
 698   elem->weights = (struct element_list_t *)
 699     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 700   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 701
 702   weight_cnt = 0;
 703
 704   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 705   do
 706     {
 707       if (arg->tok == tok_eof || arg->tok == tok_eol)
 708         break;
 709
 710       if (arg->tok == tok_ignore)
 711         {
 712           /* The weight for this level has to be ignored.  We use the
 713              null pointer to indicate this.  */
 714           elem->weights[weight_cnt].w = (struct element_t **)
 715             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 716           elem->weights[weight_cnt].w[0] = NULL;
 717           elem->weights[weight_cnt].cnt = 1;
 718         }
 719       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 720         {
 721           char ucs4str[10];
 722           struct element_t *val;
 723           char *symstr;
 724           size_t symlen;
 725
 726           if (arg->tok == tok_bsymbol)
 727             {
 728               symstr = arg->val.str.startmb;
 729               symlen = arg->val.str.lenmb;
 730             }
 731           else
 732             {
 733               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 734               symstr = ucs4str;
 735               symlen = 9;
 736             }
 737
 738           val = find_element (ldfile, collate, symstr, symlen);
 739           if (val == NULL)
 740             break;
 741
 742           elem->weights[weight_cnt].w = (struct element_t **)
 743             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 744           elem->weights[weight_cnt].w[0] = val;
 745           elem->weights[weight_cnt].cnt = 1;
 746         }
 747       else if (arg->tok == tok_string)
 748         {
 749           /* Split the string up in the individual characters and put
 750              the element definitions in the list.  */
 751           const char *cp = arg->val.str.startmb;
 752           int cnt = 0;
 753           struct element_t *charelem;
 754           struct element_t **weights = NULL;
 755           int max = 0;
 756
 757           if (*cp == '\0')
 758             {
 759               lr_error (ldfile, _("%s: empty weight string not allowed"),
 760                         "LC_COLLATE");
 761               lr_ignore_rest (ldfile, 0);
 762               break;
 763             }
 764
 765           do
 766             {
 767               if (*cp == '<')
 768                 {
 769                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 770                      the latter we have to unify the name.  */
 771                   const char *startp = ++cp;
 772                   size_t len;
 773
 774                   while (*cp != '>')
 775                     {
 776                       if (*cp == ldfile->escape_char)
 777                         ++cp;
 778                       if (*cp == '\0')
 779                         /* It's a syntax error.  */
 780                         goto syntax;
 781
 782                       ++cp;
 783                     }
 784
 785                   if (cp - startp == 5 && startp[0] == 'U'
 786                       && isxdigit (startp[1]) && isxdigit (startp[2])
 787                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 788                     {
 789                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 790                       char *newstr;
 791
 792                       newstr = (char *) xmalloc (10);
 793                       snprintf (newstr, 10, "U%08X", ucs4);
 794                       startp = newstr;
 795
 796                       len = 9;
 797                     }
 798                   else
 799                     len = cp - startp;
 800
 801                   charelem = find_element (ldfile, collate, startp, len);
 802                   ++cp;
 803                 }
 804               else
 805                 {
 806                   /* People really shouldn't use characters directly in
 807                      the string.  Especially since it's not really clear
 808                      what this means.  We interpret all characters in the
 809                      string as if that would be bsymbols.  Otherwise we
 810                      would have to match back to bsymbols somehow and this
 811                      is normally not what people normally expect.  */
 812                   charelem = find_element (ldfile, collate, cp++, 1);
 813                 }
 814
 815               if (charelem == NULL)
 816                 {
 817                   /* We ignore the rest of the line.  */
 818                   lr_ignore_rest (ldfile, 0);
 819                   break;
 820                 }
 821
 822               /* Add the pointer.  */
 823               if (cnt >= max)
 824                 {
 825                   struct element_t **newp;
 826                   max += 10;
 827                   newp = (struct element_t **)
 828                     alloca (max * sizeof (struct element_t *));
 829                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 830                   weights = newp;
 831                 }
 832               weights[cnt++] = charelem;
 833             }
 834           while (*cp != '\0');
 835
 836           /* Now store the information.  */
 837           elem->weights[weight_cnt].w = (struct element_t **)
 838             obstack_alloc (&collate->mempool,
 839                            cnt * sizeof (struct element_t *));
 840           memcpy (elem->weights[weight_cnt].w, weights,
 841                   cnt * sizeof (struct element_t *));
 842           elem->weights[weight_cnt].cnt = cnt;
 843
 844           /* We don't need the string anymore.  */
 845           free (arg->val.str.startmb);
 846         }
 847       else if (ellipsis != tok_none
 848                && (arg->tok == tok_ellipsis2
 849                    || arg->tok == tok_ellipsis3
 850                    || arg->tok == tok_ellipsis4))
 851         {
 852           /* It must be the same ellipsis as used in the initial column.  */
 853           if (arg->tok != ellipsis)
 854             lr_error (ldfile, _("\
 855 %s: weights must use the same ellipsis symbol as the name"),
 856                       "LC_COLLATE");
 857
 858           /* The weight for this level will depend on the element
 859              iterating over the range.  Put a placeholder.  */
 860           elem->weights[weight_cnt].w = (struct element_t **)
 861             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 862           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 863           elem->weights[weight_cnt].cnt = 1;
 864         }
 865       else
 866         {
 867         syntax:
 868           /* It's a syntax error.  */
 869           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 870           lr_ignore_rest (ldfile, 0);
 871           break;
 872         }
 873
 874       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 875       /* This better should be the end of the line or a semicolon.  */
 876       if (arg->tok == tok_semicolon)
 877         /* OK, ignore this and read the next token.  */
 878         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 879       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 880         {
 881           /* It's a syntax error.  */
 882           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 883           lr_ignore_rest (ldfile, 0);
 884           break;
 885         }
 886     }
 887   while (++weight_cnt < nrules);
 888
 889   if (weight_cnt < nrules)
 890     {
 891       /* This means the rest of the line uses the current element as
 892          the weight.  */
 893       do
 894         {
 895           elem->weights[weight_cnt].w = (struct element_t **)
 896             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 897           if (ellipsis == tok_none)
 898             elem->weights[weight_cnt].w[0] = elem;
 899           else
 900             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 901           elem->weights[weight_cnt].cnt = 1;
 902         }
 903       while (++weight_cnt < nrules);
 904     }
 905   else
 906     {
 907       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 908         {
 909           /* Too many rule values.  */
 910           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 911           lr_ignore_rest (ldfile, 0);
 912         }
 913       else
 914         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 915     }
 916 }
 917
 918
 919 static int
 920 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 921               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 922               struct localedef_t *result)
 923 {
 924   /* First find out what kind of symbol this is.  */
 925   struct charseq *seq;
 926   uint32_t wc;
 927   struct element_t *elem = NULL;
 928   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 929
 930   /* Try to find the character in the charmap.  */
 931   seq = charmap_find_value (charmap, symstr, symlen);
 932
 933   /* Determine the wide character.  */
 934   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 935     {
 936       wc = repertoire_find_value (repertoire, symstr, symlen);
 937       if (seq != NULL)
 938         seq->ucs4 = wc;
 939     }
 940   else
 941     wc = seq->ucs4;
 942
 943   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 944     {
 945       /* It's no character, so look through the collation elements and
 946          symbol list.  */
 947       void *ptr = elem;
 948       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 949         {
 950           void *result;
 951           struct symbol_t *sym = NULL;
 952
 953           /* It's also collation element.  Therefore it's either a
 954              collating symbol or it's a character which is not
 955              supported by the character set.  In the later case we
 956              simply create a dummy entry.  */
 957           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 958             {
 959               /* It's a collation symbol.  */
 960               sym = (struct symbol_t *) result;
 961
 962               elem = sym->order;
 963             }
 964
 965           if (elem == NULL)
 966             {
 967               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 968
 969               if (sym != NULL)
 970                 sym->order = elem;
 971               else
 972                 /* Enter a fake element in the sequence table.  This
 973                    won't cause anything in the output since there is
 974                    no multibyte or wide character associated with
 975                    it.  */
 976                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 977             }
 978         }
 979       else
 980         /* Copy the result back.  */
 981         elem = ptr;
 982     }
 983   else
 984     {
 985       /* Otherwise the symbols stands for a character.  */
 986       void *ptr = elem;
 987       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
 988         {
 989           uint32_t wcs[2] = { wc, 0 };
 990
 991           /* We have to allocate an entry.  */
 992           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 993                               seq != NULL ? seq->nbytes : 0,
 994                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 995                               symstr, symlen, 1);
 996
 997           /* And add it to the table.  */
 998           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
 999             /* This cannot happen.  */
1000             assert (! "Internal error");
1001         }
1002       else
1003         {
1004           /* Copy the result back.  */
1005           elem = ptr;
1006
1007           /* Maybe the character was used before the definition.  In this case
1008              we have to insert the byte sequences now.  */
1009           if (elem->mbs == NULL && seq != NULL)
1010             {
1011               elem->mbs = obstack_copy0 (&collate->mempool,
1012                                          seq->bytes, seq->nbytes);
1013               elem->nmbs = seq->nbytes;
1014             }
1015
1016           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1017             {
1018               uint32_t wcs[2] = { wc, 0 };
1019
1020               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1021               elem->nwcs = 1;
1022             }
1023         }
1024     }
1025
1026   /* Test whether this element is not already in the list.  */
1027   if (elem->next != NULL || elem == collate->cursor)
1028     {
1029       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1030                 (int) symlen, symstr, elem->file, elem->line);
1031       lr_ignore_rest (ldfile, 0);
1032       return 1;
1033     }
1034
1035   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1036
1037   return 0;
1038 }
1039
1040
1041 static void
1042 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1043                  enum token_t ellipsis, const struct charmap_t *charmap,
1044                  struct repertoire_t *repertoire,
1045                  struct localedef_t *result)
1046 {
1047   struct element_t *startp;
1048   struct element_t *endp;
1049   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1050
1051   /* Unlink the entry added for the ellipsis.  */
1052   unlink_element (collate);
1053   startp = collate->cursor;
1054
1055   /* Process and add the end-entry.  */
1056   if (symstr != NULL
1057       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1058     /* Something went wrong with inserting the to-value.  This means
1059        we cannot process the ellipsis.  */
1060     return;
1061
1062   /* Reset the cursor.  */
1063   collate->cursor = startp;
1064
1065   /* Now we have to handle many different situations:
1066      - we have to distinguish between the three different ellipsis forms
1067      - the is the ellipsis at the beginning, in the middle, or at the end.
1068   */
1069   endp = collate->cursor->next;
1070   assert (symstr == NULL || endp != NULL);
1071
1072   /* XXX The following is probably very wrong since also collating symbols
1073      can appear in ranges.  But do we want/can refine the test for that?  */
1074 #if 0
1075   /* Both, the start and the end symbol, must stand for characters.  */
1076   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1077       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1078     {
1079       lr_error (ldfile, _("\
1080 %s: the start and the end symbol of a range must stand for characters"),
1081                 "LC_COLLATE");
1082       return;
1083     }
1084 #endif
1085
1086   if (ellipsis == tok_ellipsis3)
1087     {
1088       /* One requirement we make here: the length of the byte
1089          sequences for the first and end character must be the same.
1090          This is mainly to prevent unwanted effects and this is often
1091          not what is wanted.  */
1092       size_t len = (startp->mbs != NULL ? startp->nmbs
1093                     : (endp->mbs != NULL ? endp->nmbs : 0));
1094       char mbcnt[len + 1];
1095       char mbend[len + 1];
1096
1097       /* Well, this should be caught somewhere else already.  Just to
1098          make sure.  */
1099       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1100       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1101
1102       if (startp != NULL && endp != NULL
1103           && startp->mbs != NULL && endp->mbs != NULL
1104           && startp->nmbs != endp->nmbs)
1105         {
1106           lr_error (ldfile, _("\
1107 %s: byte sequences of first and last character must have the same length"),
1108                     "LC_COLLATE");
1109           return;
1110         }
1111
1112       /* Determine whether we have to generate multibyte sequences.  */
1113       if ((startp == NULL || startp->mbs != NULL)
1114           && (endp == NULL || endp->mbs != NULL))
1115         {
1116           int cnt;
1117           int ret;
1118
1119           /* Prepare the beginning byte sequence.  This is either from the
1120              beginning byte sequence or it is all nulls if it was an
1121              initial ellipsis.  */
1122           if (startp == NULL || startp->mbs == NULL)
1123             memset (mbcnt, '\0', len);
1124           else
1125             {
1126               memcpy (mbcnt, startp->mbs, len);
1127
1128               /* And increment it so that the value is the first one we will
1129                  try to insert.  */
1130               for (cnt = len - 1; cnt >= 0; --cnt)
1131                 if (++mbcnt[cnt] != '\0')
1132                   break;
1133             }
1134           mbcnt[len] = '\0';
1135
1136           /* And the end sequence.  */
1137           if (endp == NULL || endp->mbs == NULL)
1138             memset (mbend, '\0', len);
1139           else
1140             memcpy (mbend, endp->mbs, len);
1141           mbend[len] = '\0';
1142
1143           /* Test whether we have a correct range.  */
1144           ret = memcmp (mbcnt, mbend, len);
1145           if (ret >= 0)
1146             {
1147               if (ret > 0)
1148                 lr_error (ldfile, _("%s: byte sequence of first character of \
1149 range is not lower than that of the last character"), "LC_COLLATE");
1150               return;
1151             }
1152
1153           /* Generate the byte sequences data.  */
1154           while (1)
1155             {
1156               struct charseq *seq;
1157
1158               /* Quite a bit of work ahead.  We have to find the character
1159                  definition for the byte sequence and then determine the
1160                  wide character belonging to it.  */
1161               seq = charmap_find_symbol (charmap, mbcnt, len);
1162               if (seq != NULL)
1163                 {
1164                   struct element_t *elem;
1165                   size_t namelen;
1166
1167                   /* I don't think this can ever happen.  */
1168                   assert (seq->name != NULL);
1169                   namelen = strlen (seq->name);
1170
1171                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1172                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1173                                                        namelen);
1174
1175                   /* Now we are ready to insert the new value in the
1176                      sequence.  Find out whether the element is
1177                      already known.  */
1178                   void *ptr;
1179                   if (find_entry (&collate->seq_table, seq->name, namelen,
1180                                   &ptr) != 0)
1181                     {
1182                       uint32_t wcs[2] = { seq->ucs4, 0 };
1183
1184                       /* We have to allocate an entry.  */
1185                       elem = new_element (collate, mbcnt, len,
1186                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1187                                           ? NULL : wcs, seq->name,
1188                                           namelen, 1);
1189
1190                       /* And add it to the table.  */
1191                       if (insert_entry (&collate->seq_table, seq->name,
1192                                         namelen, elem) != 0)
1193                         /* This cannot happen.  */
1194                         assert (! "Internal error");
1195                     }
1196                   else
1197                     /* Copy the result.  */
1198                     elem = ptr;
1199
1200                   /* Test whether this element is not already in the list.  */
1201                   if (elem->next != NULL || (collate->cursor != NULL
1202                                              && elem->next == collate->cursor))
1203                     {
1204                       lr_error (ldfile, _("\
1205 order for `%.*s' already defined at %s:%Zu"),
1206                                 (int) namelen, seq->name,
1207                                 elem->file, elem->line);
1208                       goto increment;
1209                     }
1210
1211                   /* Enqueue the new element.  */
1212                   elem->last = collate->cursor;
1213                   if (collate->cursor == NULL)
1214                     elem->next = NULL;
1215                   else
1216                     {
1217                       elem->next = collate->cursor->next;
1218                       elem->last->next = elem;
1219                       if (elem->next != NULL)
1220                         elem->next->last = elem;
1221                     }
1222                   if (collate->start == NULL)
1223                     {
1224                       assert (collate->cursor == NULL);
1225                       collate->start = elem;
1226                     }
1227                   collate->cursor = elem;
1228
1229                  /* Add the weight value.  We take them from the
1230                     `ellipsis_weights' member of `collate'.  */
1231                   elem->weights = (struct element_list_t *)
1232                     obstack_alloc (&collate->mempool,
1233                                    nrules * sizeof (struct element_list_t));
1234                   for (cnt = 0; cnt < nrules; ++cnt)
1235                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1236                         && (collate->ellipsis_weight.weights[cnt].w[0]
1237                             == ELEMENT_ELLIPSIS2))
1238                       {
1239                         elem->weights[cnt].w = (struct element_t **)
1240                           obstack_alloc (&collate->mempool,
1241                                          sizeof (struct element_t *));
1242                         elem->weights[cnt].w[0] = elem;
1243                         elem->weights[cnt].cnt = 1;
1244                       }
1245                     else
1246                       {
1247                         /* Simply use the weight from `ellipsis_weight'.  */
1248                         elem->weights[cnt].w =
1249                           collate->ellipsis_weight.weights[cnt].w;
1250                         elem->weights[cnt].cnt =
1251                           collate->ellipsis_weight.weights[cnt].cnt;
1252                       }
1253                 }
1254
1255               /* Increment for the next round.  */
1256             increment:
1257               for (cnt = len - 1; cnt >= 0; --cnt)
1258                 if (++mbcnt[cnt] != '\0')
1259                   break;
1260
1261               /* Find out whether this was all.  */
1262               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1263                 /* Yep, that's all.  */
1264                 break;
1265             }
1266         }
1267     }
1268   else
1269     {
1270       /* For symbolic range we naturally must have a beginning and an
1271          end specified by the user.  */
1272       if (startp == NULL)
1273         lr_error (ldfile, _("\
1274 %s: symbolic range ellipsis must not directly follow `order_start'"),
1275                   "LC_COLLATE");
1276       else if (endp == NULL)
1277         lr_error (ldfile, _("\
1278 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1279                   "LC_COLLATE");
1280       else
1281         {
1282           /* Determine the range.  To do so we have to determine the
1283              common prefix of the both names and then the numeric
1284              values of both ends.  */
1285           size_t lenfrom = strlen (startp->name);
1286           size_t lento = strlen (endp->name);
1287           char buf[lento + 1];
1288           int preflen = 0;
1289           long int from;
1290           long int to;
1291           char *cp;
1292           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1293
1294           if (lenfrom != lento)
1295             {
1296             invalid_range:
1297               lr_error (ldfile, _("\
1298 `%s' and `%.*s' are not valid names for symbolic range"),
1299                         startp->name, (int) lento, endp->name);
1300               return;
1301             }
1302
1303           while (startp->name[preflen] == endp->name[preflen])
1304             if (startp->name[preflen] == '\0')
1305               /* Nothing to be done.  The start and end point are identical
1306                  and while inserting the end point we have already given
1307                  the user an error message.  */
1308               return;
1309             else
1310               ++preflen;
1311
1312           errno = 0;
1313           from = strtol (startp->name + preflen, &cp, base);
1314           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1315             goto invalid_range;
1316
1317           errno = 0;
1318           to = strtol (endp->name + preflen, &cp, base);
1319           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1320             goto invalid_range;
1321
1322           /* Copy the prefix.  */
1323           memcpy (buf, startp->name, preflen);
1324
1325           /* Loop over all values.  */
1326           for (++from; from < to; ++from)
1327             {
1328               struct element_t *elem = NULL;
1329               struct charseq *seq;
1330               uint32_t wc;
1331               int cnt;
1332
1333               /* Generate the name.  */
1334               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1335                        (int) (lenfrom - preflen), from);
1336
1337               /* Look whether this name is already defined.  */
1338               void *ptr;
1339               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1340                 {
1341                   /* Copy back the result.  */
1342                   elem = ptr;
1343
1344                   if (elem->next != NULL || (collate->cursor != NULL
1345                                              && elem->next == collate->cursor))
1346                     {
1347                       lr_error (ldfile, _("\
1348 %s: order for `%.*s' already defined at %s:%Zu"),
1349                                 "LC_COLLATE", (int) lenfrom, buf,
1350                                 elem->file, elem->line);
1351                       continue;
1352                     }
1353
1354                   if (elem->name == NULL)
1355                     {
1356                       lr_error (ldfile, _("%s: `%s' must be a character"),
1357                                 "LC_COLLATE", buf);
1358                       continue;
1359                     }
1360                 }
1361
1362               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1363                 {
1364                   /* Search for a character of this name.  */
1365                   seq = charmap_find_value (charmap, buf, lenfrom);
1366                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1367                     {
1368                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1369
1370                       if (seq != NULL)
1371                         seq->ucs4 = wc;
1372                     }
1373                   else
1374                     wc = seq->ucs4;
1375
1376                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1377                     /* We don't know anything about a character with this
1378                        name.  XXX Should we warn?  */
1379                     continue;
1380
1381                   if (elem == NULL)
1382                     {
1383                       uint32_t wcs[2] = { wc, 0 };
1384
1385                       /* We have to allocate an entry.  */
1386                       elem = new_element (collate,
1387                                           seq != NULL ? seq->bytes : NULL,
1388                                           seq != NULL ? seq->nbytes : 0,
1389                                           wc == ILLEGAL_CHAR_VALUE
1390                                           ? NULL : wcs, buf, lenfrom, 1);
1391                     }
1392                   else
1393                     {
1394                       /* Update the element.  */
1395                       if (seq != NULL)
1396                         {
1397                           elem->mbs = obstack_copy0 (&collate->mempool,
1398                                                      seq->bytes, seq->nbytes);
1399                           elem->nmbs = seq->nbytes;
1400                         }
1401
1402                       if (wc != ILLEGAL_CHAR_VALUE)
1403                         {
1404                           uint32_t zero = 0;
1405
1406                           obstack_grow (&collate->mempool,
1407                                         &wc, sizeof (uint32_t));
1408                           obstack_grow (&collate->mempool,
1409                                         &zero, sizeof (uint32_t));
1410                           elem->wcs = obstack_finish (&collate->mempool);
1411                           elem->nwcs = 1;
1412                         }
1413                     }
1414
1415                   elem->file = ldfile->fname;
1416                   elem->line = ldfile->lineno;
1417                   elem->section = collate->current_section;
1418                 }
1419
1420               /* Enqueue the new element.  */
1421               elem->last = collate->cursor;
1422               elem->next = collate->cursor->next;
1423               elem->last->next = elem;
1424               if (elem->next != NULL)
1425                 elem->next->last = elem;
1426               collate->cursor = elem;
1427
1428               /* Now add the weights.  They come from the `ellipsis_weights'
1429                  member of `collate'.  */
1430               elem->weights = (struct element_list_t *)
1431                 obstack_alloc (&collate->mempool,
1432                                nrules * sizeof (struct element_list_t));
1433               for (cnt = 0; cnt < nrules; ++cnt)
1434                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1435                     && (collate->ellipsis_weight.weights[cnt].w[0]
1436                         == ELEMENT_ELLIPSIS2))
1437                   {
1438                     elem->weights[cnt].w = (struct element_t **)
1439                       obstack_alloc (&collate->mempool,
1440                                      sizeof (struct element_t *));
1441                     elem->weights[cnt].w[0] = elem;
1442                     elem->weights[cnt].cnt = 1;
1443                   }
1444                 else
1445                   {
1446                     /* Simly use the weight from `ellipsis_weight'.  */
1447                     elem->weights[cnt].w =
1448                       collate->ellipsis_weight.weights[cnt].w;
1449                     elem->weights[cnt].cnt =
1450                       collate->ellipsis_weight.weights[cnt].cnt;
1451                   }
1452             }
1453         }
1454     }
1455 }
1456
1457
1458 static void
1459 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1460                  struct localedef_t *copy_locale, int ignore_content)
1461 {
1462   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1463     {
1464       struct locale_collate_t *collate;
1465
1466       if (copy_locale == NULL)
1467         {
1468           collate = locale->categories[LC_COLLATE].collate =
1469             (struct locale_collate_t *)
1470             xcalloc (1, sizeof (struct locale_collate_t));
1471
1472           /* Init the various data structures.  */
1473           init_hash (&collate->elem_table, 100);
1474           init_hash (&collate->sym_table, 100);
1475           init_hash (&collate->seq_table, 500);
1476           obstack_init (&collate->mempool);
1477
1478           collate->col_weight_max = -1;
1479         }
1480       else
1481         /* Reuse the copy_locale's data structures.  */
1482         collate = locale->categories[LC_COLLATE].collate =
1483           copy_locale->categories[LC_COLLATE].collate;
1484     }
1485
1486   ldfile->translate_strings = 0;
1487   ldfile->return_widestr = 0;
1488 }
1489
1490
1491 void
1492 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1493 {
1494   /* Now is the time when we can assign the individual collation
1495      values for all the symbols.  We have possibly different values
1496      for the wide- and the multibyte-character symbols.  This is done
1497      since it might make a difference in the encoding if there is in
1498      some cases no multibyte-character but there are wide-characters.
1499      (The other way around it is not important since theencoded
1500      collation value in the wide-character case is 32 bits wide and
1501      therefore requires no encoding).
1502
1503      The lowest collation value assigned is 2.  Zero is reserved for
1504      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1505      functions and 1 is used to separate the individual passes for the
1506      different rules.
1507
1508      We also have to construct is list with all the bytes/words which
1509      can come first in a sequence, followed by all the elements which
1510      also start with this byte/word.  The order is reverse which has
1511      among others the important effect that longer strings are located
1512      first in the list.  This is required for the output data since
1513      the algorithm used in `strcoll' etc depends on this.
1514
1515      The multibyte case is easy.  We simply sort into an array with
1516      256 elements.  */
1517   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1518   int mbact[nrules];
1519   int wcact;
1520   int mbseqact;
1521   int wcseqact;
1522   struct element_t *runp;
1523   int i;
1524   int need_undefined = 0;
1525   struct section_list *sect;
1526   int ruleidx;
1527   int nr_wide_elems = 0;
1528
1529   if (collate == NULL)
1530     {
1531       /* No data, no check.  */
1532       if (! be_quiet)
1533         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1534                                 "LC_COLLATE"));
1535       return;
1536     }
1537
1538   /* If this assertion is hit change the type in `element_t'.  */
1539   assert (nrules <= sizeof (runp->used_in_level) * 8);
1540
1541   /* Make sure that the `position' rule is used either in all sections
1542      or in none.  */
1543   for (i = 0; i < nrules; ++i)
1544     for (sect = collate->sections; sect != NULL; sect = sect->next)
1545       if (sect->rules != NULL
1546           && ((sect->rules[i] & sort_position)
1547               != (collate->sections->rules[i] & sort_position)))
1548         {
1549           WITH_CUR_LOCALE (error (0, 0, _("\
1550 %s: `position' must be used for a specific level in all sections or none"),
1551                                   "LC_COLLATE"));
1552           break;
1553         }
1554
1555   /* Find out which elements are used at which level.  At the same
1556      time we find out whether we have any undefined symbols.  */
1557   runp = collate->start;
1558   while (runp != NULL)
1559     {
1560       if (runp->mbs != NULL)
1561         {
1562           for (i = 0; i < nrules; ++i)
1563             {
1564               int j;
1565
1566               for (j = 0; j < runp->weights[i].cnt; ++j)
1567                 /* A NULL pointer as the weight means IGNORE.  */
1568                 if (runp->weights[i].w[j] != NULL)
1569                   {
1570                     if (runp->weights[i].w[j]->weights == NULL)
1571                       {
1572                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1573                                                         runp->line,
1574                                                         _("symbol `%s' not defined"),
1575                                                         runp->weights[i].w[j]->name));
1576
1577                         need_undefined = 1;
1578                         runp->weights[i].w[j] = &collate->undefined;
1579                       }
1580                     else
1581                       /* Set the bit for the level.  */
1582                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1583                   }
1584             }
1585         }
1586
1587       /* Up to the next entry.  */
1588       runp = runp->next;
1589     }
1590
1591   /* Walk through the list of defined sequences and assign weights.  Also
1592      create the data structure which will allow generating the single byte
1593      character based tables.
1594
1595      Since at each time only the weights for each of the rules are
1596      only compared to other weights for this rule it is possible to
1597      assign more compact weight values than simply counting all
1598      weights in sequence.  We can assign weights from 3, one for each
1599      rule individually and only for those elements, which are actually
1600      used for this rule.
1601
1602      Why is this important?  It is not for the wide char table.  But
1603      it is for the singlebyte output since here larger numbers have to
1604      be encoded to make it possible to emit the value as a byte
1605      string.  */
1606   for (i = 0; i < nrules; ++i)
1607     mbact[i] = 2;
1608   wcact = 2;
1609   mbseqact = 0;
1610   wcseqact = 0;
1611   runp = collate->start;
1612   while (runp != NULL)
1613     {
1614       /* Determine the order.  */
1615       if (runp->used_in_level != 0)
1616         {
1617           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1618                                                  nrules * sizeof (int));
1619
1620           for (i = 0; i < nrules; ++i)
1621             if ((runp->used_in_level & (1 << i)) != 0)
1622               runp->mborder[i] = mbact[i]++;
1623             else
1624               runp->mborder[i] = 0;
1625         }
1626
1627       if (runp->mbs != NULL)
1628         {
1629           struct element_t **eptr;
1630           struct element_t *lastp = NULL;
1631
1632           /* Find the point where to insert in the list.  */
1633           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1634           while (*eptr != NULL)
1635             {
1636               if ((*eptr)->nmbs < runp->nmbs)
1637                 break;
1638
1639               if ((*eptr)->nmbs == runp->nmbs)
1640                 {
1641                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1642
1643                   if (c == 0)
1644                     {
1645                       /* This should not happen.  It means that we have
1646                          to symbols with the same byte sequence.  It is
1647                          of course an error.  */
1648                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1649                                                       (*eptr)->line,
1650                                                       _("\
1651 symbol `%s' has the same encoding as"), (*eptr)->name);
1652                                        error_at_line (0, 0, runp->file,
1653                                                       runp->line,
1654                                                       _("symbol `%s'"),
1655                                                       runp->name));
1656                       goto dont_insert;
1657                     }
1658                   else if (c < 0)
1659                     /* Insert it here.  */
1660                     break;
1661                 }
1662
1663               /* To the next entry.  */
1664               lastp = *eptr;
1665               eptr = &(*eptr)->mbnext;
1666             }
1667
1668           /* Set the pointers.  */
1669           runp->mbnext = *eptr;
1670           runp->mblast = lastp;
1671           if (*eptr != NULL)
1672             (*eptr)->mblast = runp;
1673           *eptr = runp;
1674         dont_insert:
1675           ;
1676         }
1677
1678       if (runp->used_in_level)
1679         {
1680           runp->wcorder = wcact++;
1681
1682           /* We take the opportunity to count the elements which have
1683              wide characters.  */
1684           ++nr_wide_elems;
1685         }
1686
1687       if (runp->is_character)
1688         {
1689           if (runp->nmbs == 1)
1690             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1691
1692           runp->wcseqorder = wcseqact++;
1693         }
1694       else if (runp->mbs != NULL && runp->weights != NULL)
1695         /* This is for collation elements.  */
1696         runp->wcseqorder = wcseqact++;
1697
1698       /* Up to the next entry.  */
1699       runp = runp->next;
1700     }
1701
1702   /* Find out whether any of the `mbheads' entries is unset.  In this
1703      case we use the UNDEFINED entry.  */
1704   for (i = 1; i < 256; ++i)
1705     if (collate->mbheads[i] == NULL)
1706       {
1707         need_undefined = 1;
1708         collate->mbheads[i] = &collate->undefined;
1709       }
1710
1711   /* Now to the wide character case.  */
1712   collate->wcheads.p = 6;
1713   collate->wcheads.q = 10;
1714   wchead_table_init (&collate->wcheads);
1715
1716   collate->wcseqorder.p = 6;
1717   collate->wcseqorder.q = 10;
1718   collseq_table_init (&collate->wcseqorder);
1719
1720   /* Start adding.  */
1721   runp = collate->start;
1722   while (runp != NULL)
1723     {
1724       if (runp->wcs != NULL)
1725         {
1726           struct element_t *e;
1727           struct element_t **eptr;
1728           struct element_t *lastp;
1729
1730           /* Insert the collation sequence value.  */
1731           if (runp->is_character)
1732             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1733                                runp->wcseqorder);
1734
1735           /* Find the point where to insert in the list.  */
1736           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1737           eptr = &e;
1738           lastp = NULL;
1739           while (*eptr != NULL)
1740             {
1741               if ((*eptr)->nwcs < runp->nwcs)
1742                 break;
1743
1744               if ((*eptr)->nwcs == runp->nwcs)
1745                 {
1746                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1747                                    (wchar_t *) runp->wcs, runp->nwcs);
1748
1749                   if (c == 0)
1750                     {
1751                       /* This should not happen.  It means that we have
1752                          two symbols with the same byte sequence.  It is
1753                          of course an error.  */
1754                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1755                                                       (*eptr)->line,
1756                                                       _("\
1757 symbol `%s' has the same encoding as"), (*eptr)->name);
1758                                        error_at_line (0, 0, runp->file,
1759                                                       runp->line,
1760                                                       _("symbol `%s'"),
1761                                                       runp->name));
1762                       goto dont_insertwc;
1763                     }
1764                   else if (c < 0)
1765                     /* Insert it here.  */
1766                     break;
1767                 }
1768
1769               /* To the next entry.  */
1770               lastp = *eptr;
1771               eptr = &(*eptr)->wcnext;
1772             }
1773
1774           /* Set the pointers.  */
1775           runp->wcnext = *eptr;
1776           runp->wclast = lastp;
1777           if (*eptr != NULL)
1778             (*eptr)->wclast = runp;
1779           *eptr = runp;
1780           if (eptr == &e)
1781             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1782         dont_insertwc:
1783           ;
1784         }
1785
1786       /* Up to the next entry.  */
1787       runp = runp->next;
1788     }
1789
1790   collseq_table_finalize (&collate->wcseqorder);
1791
1792   /* Now determine whether the UNDEFINED entry is needed and if yes,
1793      whether it was defined.  */
1794   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1795   if (collate->undefined.file == NULL)
1796     {
1797       if (need_undefined)
1798         {
1799           /* This seems not to be enforced by recent standards.  Don't
1800              emit an error, simply append UNDEFINED at the end.  */
1801           if (0)
1802             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1803
1804           /* Add UNDEFINED at the end.  */
1805           collate->undefined.mborder =
1806             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1807
1808           for (i = 0; i < nrules; ++i)
1809             collate->undefined.mborder[i] = mbact[i]++;
1810         }
1811
1812       /* In any case we will need the definition for the wide character
1813          case.  But we will not complain that it is missing since the
1814          specification strangely enough does not seem to account for
1815          this.  */
1816       collate->undefined.wcorder = wcact++;
1817     }
1818
1819   /* Finally, try to unify the rules for the sections.  Whenever the rules
1820      for a section are the same as those for another section give the
1821      ruleset the same index.  Since there are never many section we can
1822      use an O(n^2) algorithm here.  */
1823   sect = collate->sections;
1824   while (sect != NULL && sect->rules == NULL)
1825     sect = sect->next;
1826
1827   /* Bail out if we have no sections because of earlier errors.  */
1828   if (sect == NULL)
1829     {
1830       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1831                               _("too many errors; giving up")));
1832       return;
1833     }
1834
1835   ruleidx = 0;
1836   do
1837     {
1838       struct section_list *osect = collate->sections;
1839
1840       while (osect != sect)
1841         if (osect->rules != NULL
1842             && memcmp (osect->rules, sect->rules, nrules) == 0)
1843           break;
1844         else
1845           osect = osect->next;
1846
1847       if (osect == sect)
1848         sect->ruleidx = ruleidx++;
1849       else
1850         sect->ruleidx = osect->ruleidx;
1851
1852       /* Next section.  */
1853       do
1854         sect = sect->next;
1855       while (sect != NULL && sect->rules == NULL);
1856     }
1857   while (sect != NULL);
1858   /* We are currently not prepared for more than 128 rulesets.  But this
1859      should never really be a problem.  */
1860   assert (ruleidx <= 128);
1861 }
1862
1863
1864 static int32_t
1865 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1866                struct element_t *elem)
1867 {
1868   size_t cnt;
1869   int32_t retval;
1870
1871   /* Optimize the use of UNDEFINED.  */
1872   if (elem == &collate->undefined)
1873     /* The weights are already inserted.  */
1874     return 0;
1875
1876   /* This byte can start exactly one collation element and this is
1877      a single byte.  We can directly give the index to the weights.  */
1878   retval = obstack_object_size (pool);
1879
1880   /* Construct the weight.  */
1881   for (cnt = 0; cnt < nrules; ++cnt)
1882     {
1883       char buf[elem->weights[cnt].cnt * 7];
1884       int len = 0;
1885       int i;
1886
1887       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1888         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1889         if (elem->weights[cnt].w[i] != NULL)
1890           len += utf8_encode (&buf[len],
1891                               elem->weights[cnt].w[i]->mborder[cnt]);
1892
1893       /* And add the buffer content.  */
1894       obstack_1grow (pool, len);
1895       obstack_grow (pool, buf, len);
1896     }
1897
1898   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1899 }
1900
1901
1902 static int32_t
1903 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1904                  struct element_t *elem)
1905 {
1906   size_t cnt;
1907   int32_t retval;
1908
1909   /* Optimize the use of UNDEFINED.  */
1910   if (elem == &collate->undefined)
1911     /* The weights are already inserted.  */
1912     return 0;
1913
1914   /* This byte can start exactly one collation element and this is
1915      a single byte.  We can directly give the index to the weights.  */
1916   retval = obstack_object_size (pool) / sizeof (int32_t);
1917
1918   /* Construct the weight.  */
1919   for (cnt = 0; cnt < nrules; ++cnt)
1920     {
1921       int32_t buf[elem->weights[cnt].cnt];
1922       int i;
1923       int32_t j;
1924
1925       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1926         if (elem->weights[cnt].w[i] != NULL)
1927           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1928
1929       /* And add the buffer content.  */
1930       obstack_int32_grow (pool, j);
1931
1932       obstack_grow (pool, buf, j * sizeof (int32_t));
1933     }
1934
1935   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1936 }
1937
1938
1939 void
1940 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
1941                 const char *output_path)
1942 {
1943   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1944   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1945   struct iovec iov[2 + nelems];
1946   struct locale_file data;
1947   uint32_t idx[nelems];
1948   size_t cnt;
1949   size_t ch;
1950   int32_t tablemb[256];
1951   struct obstack weightpool;
1952   struct obstack extrapool;
1953   struct obstack indirectpool;
1954   struct section_list *sect;
1955   struct collidx_table tablewc;
1956   uint32_t elem_size;
1957   uint32_t *elem_table;
1958   int i;
1959   struct element_t *runp;
1960
1961   data.magic = LIMAGIC (LC_COLLATE);
1962   data.n = nelems;
1963   iov[0].iov_base = (void *) &data;
1964   iov[0].iov_len = sizeof (data);
1965
1966   iov[1].iov_base = (void *) idx;
1967   iov[1].iov_len = sizeof (idx);
1968
1969   idx[0] = iov[0].iov_len + iov[1].iov_len;
1970   cnt = 0;
1971
1972   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1973   iov[2 + cnt].iov_base = &nrules;
1974   iov[2 + cnt].iov_len = sizeof (uint32_t);
1975   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1976   ++cnt;
1977
1978   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
1979   if (collate == NULL)
1980     {
1981       int32_t dummy = 0;
1982
1983       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1984         {
1985           /* The words have to be handled specially.  */
1986           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1987             {
1988               iov[2 + cnt].iov_base = &dummy;
1989               iov[2 + cnt].iov_len = sizeof (int32_t);
1990             }
1991           else
1992             {
1993               iov[2 + cnt].iov_base = NULL;
1994               iov[2 + cnt].iov_len = 0;
1995             }
1996
1997           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1998             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1999           ++cnt;
2000         }
2001
2002       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2003
2004       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2005
2006       return;
2007     }
2008
2009   obstack_init (&weightpool);
2010   obstack_init (&extrapool);
2011   obstack_init (&indirectpool);
2012
2013   /* Since we are using the sign of an integer to mark indirection the
2014      offsets in the arrays we are indirectly referring to must not be
2015      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2016   obstack_int32_grow (&extrapool, 0);
2017   obstack_int32_grow (&indirectpool, 0);
2018
2019   /* Prepare the ruleset table.  */
2020   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2021     if (sect->rules != NULL && sect->ruleidx == i)
2022       {
2023         int j;
2024
2025         obstack_make_room (&weightpool, nrules);
2026
2027         for (j = 0; j < nrules; ++j)
2028           obstack_1grow_fast (&weightpool, sect->rules[j]);
2029         ++i;
2030       }
2031   /* And align the output.  */
2032   i = (nrules * i) % __alignof__ (int32_t);
2033   if (i > 0)
2034     do
2035       obstack_1grow (&weightpool, '\0');
2036     while (++i < __alignof__ (int32_t));
2037
2038   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2039   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2040   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2041   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2042   ++cnt;
2043
2044   /* Generate the 8-bit table.  Walk through the lists of sequences
2045      starting with the same byte and add them one after the other to
2046      the table.  In case we have more than one sequence starting with
2047      the same byte we have to use extra indirection.
2048
2049      First add a record for the NUL byte.  This entry will never be used
2050      so it does not matter.  */
2051   tablemb[0] = 0;
2052
2053   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2054      will probably be used more than once it is good to store the
2055      weights only once.  */
2056   if (collate->undefined.used_in_level != 0)
2057     output_weight (&weightpool, collate, &collate->undefined);
2058
2059   for (ch = 1; ch < 256; ++ch)
2060     if (collate->mbheads[ch]->mbnext == NULL
2061         && collate->mbheads[ch]->nmbs <= 1)
2062       {
2063         tablemb[ch] = output_weight (&weightpool, collate,
2064                                      collate->mbheads[ch]);
2065       }
2066     else
2067       {
2068         /* The entries in the list are sorted by length and then
2069            alphabetically.  This is the order in which we will add the
2070            elements to the collation table.  This allows simply walking
2071            the table in sequence and stopping at the first matching
2072            entry.  Since the longer sequences are coming first in the
2073            list they have the possibility to match first, just as it
2074            has to be.  In the worst case we are walking to the end of
2075            the list where we put, if no singlebyte sequence is defined
2076            in the locale definition, the weights for UNDEFINED.
2077
2078            To reduce the length of the search list we compress them a bit.
2079            This happens by collecting sequences of consecutive byte
2080            sequences in one entry (having and begin and end byte sequence)
2081            and add only one index into the weight table.  We can find the
2082            consecutive entries since they are also consecutive in the list.  */
2083         struct element_t *runp = collate->mbheads[ch];
2084         struct element_t *lastp;
2085
2086         assert ((obstack_object_size (&extrapool)
2087                  & (__alignof__ (int32_t) - 1)) == 0);
2088
2089         tablemb[ch] = -obstack_object_size (&extrapool);
2090
2091         do
2092           {
2093             /* Store the current index in the weight table.  We know that
2094                the current position in the `extrapool' is aligned on a
2095                32-bit address.  */
2096             int32_t weightidx;
2097             int added;
2098
2099             /* Find out wether this is a single entry or we have more than
2100                one consecutive entry.  */
2101             if (runp->mbnext != NULL
2102                 && runp->nmbs == runp->mbnext->nmbs
2103                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2104                 && (runp->mbs[runp->nmbs - 1]
2105                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2106               {
2107                 int i;
2108                 struct element_t *series_startp = runp;
2109                 struct element_t *curp;
2110
2111                 /* Compute how much space we will need.  */
2112                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2113                           + __alignof__ (int32_t) - 1)
2114                          & ~(__alignof__ (int32_t) - 1));
2115                 assert ((obstack_object_size (&extrapool)
2116                          & (__alignof__ (int32_t) - 1)) == 0);
2117                 obstack_make_room (&extrapool, added);
2118
2119                 /* More than one consecutive entry.  We mark this by having
2120                    a negative index into the indirect table.  */
2121                 obstack_int32_grow_fast (&extrapool,
2122                                          -(obstack_object_size (&indirectpool)
2123                                            / sizeof (int32_t)));
2124
2125                 /* Now search first the end of the series.  */
2126                 do
2127                   runp = runp->mbnext;
2128                 while (runp->mbnext != NULL
2129                        && runp->nmbs == runp->mbnext->nmbs
2130                        && memcmp (runp->mbs, runp->mbnext->mbs,
2131                                   runp->nmbs - 1) == 0
2132                        && (runp->mbs[runp->nmbs - 1]
2133                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2134
2135                 /* Now walk backward from here to the beginning.  */
2136                 curp = runp;
2137
2138                 assert (runp->nmbs <= 256);
2139                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2140                 for (i = 1; i < curp->nmbs; ++i)
2141                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2142
2143                 /* Now find the end of the consecutive sequence and
2144                    add all the indeces in the indirect pool.  */
2145                 do
2146                   {
2147                     weightidx = output_weight (&weightpool, collate, curp);
2148                     obstack_int32_grow (&indirectpool, weightidx);
2149
2150                     curp = curp->mblast;
2151                   }
2152                 while (curp != series_startp);
2153
2154                 /* Add the final weight.  */
2155                 weightidx = output_weight (&weightpool, collate, curp);
2156                 obstack_int32_grow (&indirectpool, weightidx);
2157
2158                 /* And add the end byte sequence.  Without length this
2159                    time.  */
2160                 for (i = 1; i < curp->nmbs; ++i)
2161                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2162               }
2163             else
2164               {
2165                 /* A single entry.  Simply add the index and the length and
2166                    string (except for the first character which is already
2167                    tested for).  */
2168                 int i;
2169
2170                 /* Output the weight info.  */
2171                 weightidx = output_weight (&weightpool, collate, runp);
2172
2173                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2174                           + __alignof__ (int32_t) - 1)
2175                          & ~(__alignof__ (int32_t) - 1));
2176                 assert ((obstack_object_size (&extrapool)
2177                          & (__alignof__ (int32_t) - 1)) == 0);
2178                 obstack_make_room (&extrapool, added);
2179
2180                 obstack_int32_grow_fast (&extrapool, weightidx);
2181                 assert (runp->nmbs <= 256);
2182                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2183
2184                 for (i = 1; i < runp->nmbs; ++i)
2185                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2186               }
2187
2188             /* Add alignment bytes if necessary.  */
2189             while ((obstack_object_size (&extrapool)
2190                     & (__alignof__ (int32_t) - 1)) != 0)
2191               obstack_1grow_fast (&extrapool, '\0');
2192
2193             /* Next entry.  */
2194             lastp = runp;
2195             runp = runp->mbnext;
2196           }
2197         while (runp != NULL);
2198
2199         assert ((obstack_object_size (&extrapool)
2200                  & (__alignof__ (int32_t) - 1)) == 0);
2201
2202         /* If the final entry in the list is not a single character we
2203            add an UNDEFINED entry here.  */
2204         if (lastp->nmbs != 1)
2205           {
2206             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2207                          & ~(__alignof__ (int32_t) - 1));
2208             obstack_make_room (&extrapool, added);
2209
2210             obstack_int32_grow_fast (&extrapool, 0);
2211             /* XXX What rule? We just pick the first.  */
2212             obstack_1grow_fast (&extrapool, 0);
2213             /* Length is zero.  */
2214             obstack_1grow_fast (&extrapool, 0);
2215
2216             /* Add alignment bytes if necessary.  */
2217             while ((obstack_object_size (&extrapool)
2218                     & (__alignof__ (int32_t) - 1)) != 0)
2219               obstack_1grow_fast (&extrapool, '\0');
2220           }
2221       }
2222
2223   /* Add padding to the tables if necessary.  */
2224   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2225          != 0)
2226     obstack_1grow (&weightpool, 0);
2227
2228   /* Now add the four tables.  */
2229   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2230   iov[2 + cnt].iov_base = tablemb;
2231   iov[2 + cnt].iov_len = sizeof (tablemb);
2232   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2233   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2234   ++cnt;
2235
2236   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2237   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2238   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2239   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2240   ++cnt;
2241
2242   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2243   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2244   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2245   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2246   ++cnt;
2247
2248   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2249   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2250   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2251   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2252   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2253   ++cnt;
2254
2255
2256   /* Now the same for the wide character table.  We need to store some
2257      more information here.  */
2258   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2259   iov[2 + cnt].iov_base = NULL;
2260   iov[2 + cnt].iov_len = 0;
2261   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2262   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2263   ++cnt;
2264
2265   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2266   iov[2 + cnt].iov_base = NULL;
2267   iov[2 + cnt].iov_len = 0;
2268   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2269   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2270   ++cnt;
2271
2272   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2273   iov[2 + cnt].iov_base = NULL;
2274   iov[2 + cnt].iov_len = 0;
2275   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2276   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2277   ++cnt;
2278
2279   /* Since we are using the sign of an integer to mark indirection the
2280      offsets in the arrays we are indirectly referring to must not be
2281      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2282   obstack_int32_grow (&extrapool, 0);
2283   obstack_int32_grow (&indirectpool, 0);
2284
2285   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2286      will probably be used more than once it is good to store the
2287      weights only once.  */
2288   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2289     abort ();
2290
2291   /* Generate the table.  Walk through the lists of sequences starting
2292      with the same wide character and add them one after the other to
2293      the table.  In case we have more than one sequence starting with
2294      the same byte we have to use extra indirection.  */
2295   {
2296     auto void add_to_tablewc (uint32_t ch, struct element_t *runp);
2297
2298     void add_to_tablewc (uint32_t ch, struct element_t *runp)
2299       {
2300         if (runp->wcnext == NULL && runp->nwcs == 1)
2301           {
2302             int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2303             collidx_table_add (&tablewc, ch, weigthidx);
2304           }
2305         else
2306           {
2307             /* As for the singlebyte table, we recognize sequences and
2308                compress them.  */
2309             struct element_t *lastp;
2310
2311             collidx_table_add (&tablewc, ch,
2312                                -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2313
2314             do
2315               {
2316                 /* Store the current index in the weight table.  We know that
2317                    the current position in the `extrapool' is aligned on a
2318                    32-bit address.  */
2319                 int32_t weightidx;
2320                 int added;
2321
2322                 /* Find out wether this is a single entry or we have more than
2323                    one consecutive entry.  */
2324                 if (runp->wcnext != NULL
2325                     && runp->nwcs == runp->wcnext->nwcs
2326                     && wmemcmp ((wchar_t *) runp->wcs,
2327                                 (wchar_t *)runp->wcnext->wcs,
2328                                 runp->nwcs - 1) == 0
2329                     && (runp->wcs[runp->nwcs - 1]
2330                         == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2331                   {
2332                     int i;
2333                     struct element_t *series_startp = runp;
2334                     struct element_t *curp;
2335
2336                     /* Now add first the initial byte sequence.  */
2337                     added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2338                     if (sizeof (int32_t) == sizeof (int))
2339                       obstack_make_room (&extrapool, added);
2340
2341                     /* More than one consecutive entry.  We mark this by having
2342                        a negative index into the indirect table.  */
2343                     obstack_int32_grow_fast (&extrapool,
2344                                              -(obstack_object_size (&indirectpool)
2345                                                / sizeof (int32_t)));
2346                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2347
2348                     do
2349                       runp = runp->wcnext;
2350                     while (runp->wcnext != NULL
2351                            && runp->nwcs == runp->wcnext->nwcs
2352                            && wmemcmp ((wchar_t *) runp->wcs,
2353                                        (wchar_t *)runp->wcnext->wcs,
2354                                        runp->nwcs - 1) == 0
2355                            && (runp->wcs[runp->nwcs - 1]
2356                                == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2357
2358                     /* Now walk backward from here to the beginning.  */
2359                     curp = runp;
2360
2361                     for (i = 1; i < runp->nwcs; ++i)
2362                       obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2363
2364                     /* Now find the end of the consecutive sequence and
2365                        add all the indeces in the indirect pool.  */
2366                     do
2367                       {
2368                         weightidx = output_weightwc (&weightpool, collate,
2369                                                      curp);
2370                         obstack_int32_grow (&indirectpool, weightidx);
2371
2372                         curp = curp->wclast;
2373                       }
2374                     while (curp != series_startp);
2375
2376                     /* Add the final weight.  */
2377                     weightidx = output_weightwc (&weightpool, collate, curp);
2378                     obstack_int32_grow (&indirectpool, weightidx);
2379
2380                     /* And add the end byte sequence.  Without length this
2381                        time.  */
2382                     for (i = 1; i < curp->nwcs; ++i)
2383                       obstack_int32_grow (&extrapool, curp->wcs[i]);
2384                   }
2385                 else
2386                   {
2387                     /* A single entry.  Simply add the index and the length and
2388                        string (except for the first character which is already
2389                        tested for).  */
2390                     int i;
2391
2392                     /* Output the weight info.  */
2393                     weightidx = output_weightwc (&weightpool, collate, runp);
2394
2395                     added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2396                     if (sizeof (int) == sizeof (int32_t))
2397                       obstack_make_room (&extrapool, added);
2398
2399                     obstack_int32_grow_fast (&extrapool, weightidx);
2400                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2401                     for (i = 1; i < runp->nwcs; ++i)
2402                       obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2403                   }
2404
2405                 /* Next entry.  */
2406                 lastp = runp;
2407                 runp = runp->wcnext;
2408               }
2409             while (runp != NULL);
2410           }
2411       }
2412
2413     tablewc.p = 6;
2414     tablewc.q = 10;
2415     collidx_table_init (&tablewc);
2416
2417     wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2418
2419     collidx_table_finalize (&tablewc);
2420   }
2421
2422   /* Now add the four tables.  */
2423   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2424   iov[2 + cnt].iov_base = tablewc.result;
2425   iov[2 + cnt].iov_len = tablewc.result_size;
2426   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2427   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2428   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2429   ++cnt;
2430
2431   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2432   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2433   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2434   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2435   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2436   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2437   ++cnt;
2438
2439   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2440   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2441   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2442   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2443   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2444   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2445   ++cnt;
2446
2447   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2448   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2449   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2450   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2451   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2452   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2453   ++cnt;
2454
2455
2456   /* Finally write the table with collation element names out.  It is
2457      a hash table with a simple function which gets the name of the
2458      character as the input.  One character might have many names.  The
2459      value associated with the name is an index into the weight table
2460      where we are then interested in the first-level weight value.
2461
2462      To determine how large the table should be we are counting the
2463      elements have to put in.  Since we are using internal chaining
2464      using a secondary hash function we have to make the table a bit
2465      larger to avoid extremely long search times.  We can achieve
2466      good results with a 40% larger table than there are entries.  */
2467   elem_size = 0;
2468   runp = collate->start;
2469   while (runp != NULL)
2470     {
2471       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2472         /* Yep, the element really counts.  */
2473         ++elem_size;
2474
2475       runp = runp->next;
2476     }
2477   /* Add 40% and find the next prime number.  */
2478   elem_size = next_prime (elem_size * 1.4);
2479
2480   /* Allocate the table.  Each entry consists of two words: the hash
2481      value and an index in a secondary table which provides the index
2482      into the weight table and the string itself (so that a match can
2483      be determined).  */
2484   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2485                                            elem_size * 2 * sizeof (uint32_t));
2486   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2487
2488   /* Now add the elements.  */
2489   runp = collate->start;
2490   while (runp != NULL)
2491     {
2492       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2493         {
2494           /* Compute the hash value of the name.  */
2495           uint32_t namelen = strlen (runp->name);
2496           uint32_t hash = elem_hash (runp->name, namelen);
2497           size_t idx = hash % elem_size;
2498           size_t start_idx = idx;
2499
2500           if (elem_table[idx * 2] != 0)
2501             {
2502               /* The spot is already taken.  Try iterating using the value
2503                  from the secondary hashing function.  */
2504               size_t iter = hash % (elem_size - 2) + 1;
2505
2506               do
2507                 {
2508                   idx += iter;
2509                   if (idx >= elem_size)
2510                     idx -= elem_size;
2511                   assert (idx != start_idx);
2512                 }
2513               while (elem_table[idx * 2] != 0);
2514             }
2515           /* This is the spot where we will insert the value.  */
2516           elem_table[idx * 2] = hash;
2517           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2518
2519           /* The the string itself including length.  */
2520           obstack_1grow (&extrapool, namelen);
2521           obstack_grow (&extrapool, runp->name, namelen);
2522
2523           /* And the multibyte representation.  */
2524           obstack_1grow (&extrapool, runp->nmbs);
2525           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2526
2527           /* And align again to 32 bits.  */
2528           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2529             obstack_grow (&extrapool, "\0\0",
2530                           (sizeof (int32_t)
2531                            - ((1 + namelen + 1 + runp->nmbs)
2532                               % sizeof (int32_t))));
2533
2534           /* Now some 32-bit values: multibyte collation sequence,
2535              wide char string (including length), and wide char
2536              collation sequence.  */
2537           obstack_int32_grow (&extrapool, runp->mbseqorder);
2538
2539           obstack_int32_grow (&extrapool, runp->nwcs);
2540           obstack_grow (&extrapool, runp->wcs,
2541                         runp->nwcs * sizeof (uint32_t));
2542
2543           obstack_int32_grow (&extrapool, runp->wcseqorder);
2544         }
2545
2546       runp = runp->next;
2547     }
2548
2549   /* Prepare to write out this data.  */
2550   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2551   iov[2 + cnt].iov_base = &elem_size;
2552   iov[2 + cnt].iov_len = sizeof (int32_t);
2553   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2554   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2555   ++cnt;
2556
2557   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2558   iov[2 + cnt].iov_base = elem_table;
2559   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2560   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2561   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2562   ++cnt;
2563
2564   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2565   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2566   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2567   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2568   ++cnt;
2569
2570   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2571   iov[2 + cnt].iov_base = collate->mbseqorder;
2572   iov[2 + cnt].iov_len = 256;
2573   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2574   ++cnt;
2575
2576   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2577   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2578   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2579   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2580   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2581   ++cnt;
2582
2583   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2584   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2585   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2586   ++cnt;
2587
2588   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2589
2590   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2591
2592   obstack_free (&weightpool, NULL);
2593   obstack_free (&extrapool, NULL);
2594   obstack_free (&indirectpool, NULL);
2595 }
2596
2597
2598 void
2599 collate_read (struct linereader *ldfile, struct localedef_t *result,
2600               const struct charmap_t *charmap, const char *repertoire_name,
2601               int ignore_content)
2602 {
2603   struct repertoire_t *repertoire = NULL;
2604   struct locale_collate_t *collate;
2605   struct token *now;
2606   struct token *arg = NULL;
2607   enum token_t nowtok;
2608   enum token_t was_ellipsis = tok_none;
2609   struct localedef_t *copy_locale = NULL;
2610   /* Parsing state:
2611      0 - start
2612      1 - between `order-start' and `order-end'
2613      2 - after `order-end'
2614      3 - after `reorder-after', waiting for `reorder-end'
2615      4 - after `reorder-end'
2616      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2617      6 - after `reorder-sections-end'
2618   */
2619   int state = 0;
2620
2621   /* Get the repertoire we have to use.  */
2622   if (repertoire_name != NULL)
2623     repertoire = repertoire_read (repertoire_name);
2624
2625   /* The rest of the line containing `LC_COLLATE' must be free.  */
2626   lr_ignore_rest (ldfile, 1);
2627
2628   do
2629     {
2630       now = lr_token (ldfile, charmap, result, NULL, verbose);
2631       nowtok = now->tok;
2632     }
2633   while (nowtok == tok_eol);
2634
2635   if (nowtok == tok_copy)
2636     {
2637       state = 2;
2638       now = lr_token (ldfile, charmap, result, NULL, verbose);
2639       if (now->tok != tok_string)
2640         {
2641           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2642
2643         skip_category:
2644           do
2645             now = lr_token (ldfile, charmap, result, NULL, verbose);
2646           while (now->tok != tok_eof && now->tok != tok_end);
2647
2648           if (now->tok != tok_eof
2649               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2650                   now->tok == tok_eof))
2651             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2652           else if (now->tok != tok_lc_collate)
2653             {
2654               lr_error (ldfile, _("\
2655 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2656               lr_ignore_rest (ldfile, 0);
2657             }
2658           else
2659             lr_ignore_rest (ldfile, 1);
2660
2661           return;
2662         }
2663
2664       if (! ignore_content)
2665         {
2666           /* Get the locale definition.  */
2667           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2668                                      repertoire_name, charmap, NULL);
2669           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2670             {
2671               /* Not yet loaded.  So do it now.  */
2672               if (locfile_read (copy_locale, charmap) != 0)
2673                 goto skip_category;
2674             }
2675
2676           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2677             return;
2678         }
2679
2680       lr_ignore_rest (ldfile, 1);
2681
2682       now = lr_token (ldfile, charmap, result, NULL, verbose);
2683       nowtok = now->tok;
2684     }
2685
2686   /* Prepare the data structures.  */
2687   collate_startup (ldfile, result, copy_locale, ignore_content);
2688   collate = result->categories[LC_COLLATE].collate;
2689
2690   while (1)
2691     {
2692       char ucs4buf[10];
2693       char *symstr;
2694       size_t symlen;
2695
2696       /* Of course we don't proceed beyond the end of file.  */
2697       if (nowtok == tok_eof)
2698         break;
2699
2700       /* Ingore empty lines.  */
2701       if (nowtok == tok_eol)
2702         {
2703           now = lr_token (ldfile, charmap, result, NULL, verbose);
2704           nowtok = now->tok;
2705           continue;
2706         }
2707
2708       switch (nowtok)
2709         {
2710         case tok_copy:
2711           /* Allow copying other locales.  */
2712           now = lr_token (ldfile, charmap, result, NULL, verbose);
2713           if (now->tok != tok_string)
2714             goto err_label;
2715
2716           if (! ignore_content)
2717             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2718                          charmap, result);
2719
2720           lr_ignore_rest (ldfile, 1);
2721           break;
2722
2723         case tok_coll_weight_max:
2724           /* Ignore the rest of the line if we don't need the input of
2725              this line.  */
2726           if (ignore_content)
2727             {
2728               lr_ignore_rest (ldfile, 0);
2729               break;
2730             }
2731
2732           if (state != 0)
2733             goto err_label;
2734
2735           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2736           if (arg->tok != tok_number)
2737             goto err_label;
2738           if (collate->col_weight_max != -1)
2739             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2740                       "LC_COLLATE", "col_weight_max");
2741           else
2742             collate->col_weight_max = arg->val.num;
2743           lr_ignore_rest (ldfile, 1);
2744           break;
2745
2746         case tok_section_symbol:
2747           /* Ignore the rest of the line if we don't need the input of
2748              this line.  */
2749           if (ignore_content)
2750             {
2751               lr_ignore_rest (ldfile, 0);
2752               break;
2753             }
2754
2755           if (state != 0)
2756             goto err_label;
2757
2758           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2759           if (arg->tok != tok_bsymbol)
2760             goto err_label;
2761           else if (!ignore_content)
2762             {
2763               /* Check whether this section is already known.  */
2764               struct section_list *known = collate->sections;
2765               while (known != NULL)
2766                 {
2767                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2768                     break;
2769                   known = known->next;
2770                 }
2771
2772               if (known != NULL)
2773                 {
2774                   lr_error (ldfile,
2775                             _("%s: duplicate declaration of section `%s'"),
2776                             "LC_COLLATE", arg->val.str.startmb);
2777                   free (arg->val.str.startmb);
2778                 }
2779               else
2780                 collate->sections = make_seclist_elem (collate,
2781                                                        arg->val.str.startmb,
2782                                                        collate->sections);
2783
2784               lr_ignore_rest (ldfile, known == NULL);
2785             }
2786           else
2787             {
2788               free (arg->val.str.startmb);
2789               lr_ignore_rest (ldfile, 0);
2790             }
2791           break;
2792
2793         case tok_collating_element:
2794           /* Ignore the rest of the line if we don't need the input of
2795              this line.  */
2796           if (ignore_content)
2797             {
2798               lr_ignore_rest (ldfile, 0);
2799               break;
2800             }
2801
2802           if (state != 0 && state != 2)
2803             goto err_label;
2804
2805           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2806           if (arg->tok != tok_bsymbol)
2807             goto err_label;
2808           else
2809             {
2810               const char *symbol = arg->val.str.startmb;
2811               size_t symbol_len = arg->val.str.lenmb;
2812
2813               /* Next the `from' keyword.  */
2814               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2815               if (arg->tok != tok_from)
2816                 {
2817                   free ((char *) symbol);
2818                   goto err_label;
2819                 }
2820
2821               ldfile->return_widestr = 1;
2822               ldfile->translate_strings = 1;
2823
2824               /* Finally the string with the replacement.  */
2825               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2826
2827               ldfile->return_widestr = 0;
2828               ldfile->translate_strings = 0;
2829
2830               if (arg->tok != tok_string)
2831                 goto err_label;
2832
2833               if (!ignore_content && symbol != NULL)
2834                 {
2835                   /* The name is already defined.  */
2836                   if (check_duplicate (ldfile, collate, charmap,
2837                                        repertoire, symbol, symbol_len))
2838                     goto col_elem_free;
2839
2840                   if (arg->val.str.startmb != NULL)
2841                     insert_entry (&collate->elem_table, symbol, symbol_len,
2842                                   new_element (collate,
2843                                                arg->val.str.startmb,
2844                                                arg->val.str.lenmb - 1,
2845                                                arg->val.str.startwc,
2846                                                symbol, symbol_len, 0));
2847                 }
2848               else
2849                 {
2850                 col_elem_free:
2851                   if (symbol != NULL)
2852                     free ((char *) symbol);
2853                   if (arg->val.str.startmb != NULL)
2854                     free (arg->val.str.startmb);
2855                   if (arg->val.str.startwc != NULL)
2856                     free (arg->val.str.startwc);
2857                 }
2858               lr_ignore_rest (ldfile, 1);
2859             }
2860           break;
2861
2862         case tok_collating_symbol:
2863           /* Ignore the rest of the line if we don't need the input of
2864              this line.  */
2865           if (ignore_content)
2866             {
2867               lr_ignore_rest (ldfile, 0);
2868               break;
2869             }
2870
2871           if (state != 0 && state != 2)
2872             goto err_label;
2873
2874           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2875           if (arg->tok != tok_bsymbol)
2876             goto err_label;
2877           else
2878             {
2879               char *symbol = arg->val.str.startmb;
2880               size_t symbol_len = arg->val.str.lenmb;
2881               char *endsymbol = NULL;
2882               size_t endsymbol_len = 0;
2883               enum token_t ellipsis = tok_none;
2884
2885               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2886               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2887                 {
2888                   ellipsis = arg->tok;
2889
2890                   arg = lr_token (ldfile, charmap, result, repertoire,
2891                                   verbose);
2892                   if (arg->tok != tok_bsymbol)
2893                     {
2894                       free (symbol);
2895                       goto err_label;
2896                     }
2897
2898                   endsymbol = arg->val.str.startmb;
2899                   endsymbol_len = arg->val.str.lenmb;
2900
2901                   lr_ignore_rest (ldfile, 1);
2902                 }
2903               else if (arg->tok != tok_eol)
2904                 {
2905                   free (symbol);
2906                   goto err_label;
2907                 }
2908
2909               if (!ignore_content)
2910                 {
2911                   if (symbol == NULL
2912                       || (ellipsis != tok_none && endsymbol == NULL))
2913                     {
2914                       lr_error (ldfile, _("\
2915 %s: unknown character in collating symbol name"),
2916                                 "LC_COLLATE");
2917                       goto col_sym_free;
2918                     }
2919                   else if (ellipsis == tok_none)
2920                     {
2921                       /* A single symbol, no ellipsis.  */
2922                       if (check_duplicate (ldfile, collate, charmap,
2923                                            repertoire, symbol, symbol_len))
2924                         /* The name is already defined.  */
2925                         goto col_sym_free;
2926
2927                       insert_entry (&collate->sym_table, symbol, symbol_len,
2928                                     new_symbol (collate, symbol, symbol_len));
2929                     }
2930                   else if (symbol_len != endsymbol_len)
2931                     {
2932                     col_sym_inv_range:
2933                       lr_error (ldfile,
2934                                 _("invalid names for character range"));
2935                       goto col_sym_free;
2936                     }
2937                   else
2938                     {
2939                       /* Oh my, we have to handle an ellipsis.  First, as
2940                          usual, determine the common prefix and then
2941                          convert the rest into a range.  */
2942                       size_t prefixlen;
2943                       unsigned long int from;
2944                       unsigned long int to;
2945                       char *endp;
2946
2947                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2948                         if (symbol[prefixlen] != endsymbol[prefixlen])
2949                           break;
2950
2951                       /* Convert the rest into numbers.  */
2952                       symbol[symbol_len] = '\0';
2953                       from = strtoul (&symbol[prefixlen], &endp,
2954                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2955                       if (*endp != '\0')
2956                         goto col_sym_inv_range;
2957
2958                       endsymbol[symbol_len] = '\0';
2959                       to = strtoul (&endsymbol[prefixlen], &endp,
2960                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2961                       if (*endp != '\0')
2962                         goto col_sym_inv_range;
2963
2964                       if (from > to)
2965                         goto col_sym_inv_range;
2966
2967                       /* Now loop over all entries.  */
2968                       while (from <= to)
2969                         {
2970                           char *symbuf;
2971
2972                           symbuf = (char *) obstack_alloc (&collate->mempool,
2973                                                            symbol_len + 1);
2974
2975                           /* Create the name.  */
2976                           sprintf (symbuf,
2977                                    ellipsis == tok_ellipsis2
2978                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2979                                    (int) prefixlen, symbol,
2980                                    (int) (symbol_len - prefixlen), from);
2981
2982                           if (check_duplicate (ldfile, collate, charmap,
2983                                                repertoire, symbuf, symbol_len))
2984                             /* The name is already defined.  */
2985                             goto col_sym_free;
2986
2987                           insert_entry (&collate->sym_table, symbuf,
2988                                         symbol_len,
2989                                         new_symbol (collate, symbuf,
2990                                                     symbol_len));
2991
2992                           /* Increment the counter.  */
2993                           ++from;
2994                         }
2995
2996                       goto col_sym_free;
2997                     }
2998                 }
2999               else
3000                 {
3001                 col_sym_free:
3002                   if (symbol != NULL)
3003                     free (symbol);
3004                   if (endsymbol != NULL)
3005                     free (endsymbol);
3006                 }
3007             }
3008           break;
3009
3010         case tok_symbol_equivalence:
3011           /* Ignore the rest of the line if we don't need the input of
3012              this line.  */
3013           if (ignore_content)
3014             {
3015               lr_ignore_rest (ldfile, 0);
3016               break;
3017             }
3018
3019           if (state != 0)
3020             goto err_label;
3021
3022           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3023           if (arg->tok != tok_bsymbol)
3024             goto err_label;
3025           else
3026             {
3027               const char *newname = arg->val.str.startmb;
3028               size_t newname_len = arg->val.str.lenmb;
3029               const char *symname;
3030               size_t symname_len;
3031               void *symval;     /* Actually struct symbol_t*  */
3032
3033               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3034               if (arg->tok != tok_bsymbol)
3035                 {
3036                   if (newname != NULL)
3037                     free ((char *) newname);
3038                   goto err_label;
3039                 }
3040
3041               symname = arg->val.str.startmb;
3042               symname_len = arg->val.str.lenmb;
3043
3044               if (newname == NULL)
3045                 {
3046                   lr_error (ldfile, _("\
3047 %s: unknown character in equivalent definition name"),
3048                             "LC_COLLATE");
3049
3050                 sym_equiv_free:
3051                   if (newname != NULL)
3052                     free ((char *) newname);
3053                   if (symname != NULL)
3054                     free ((char *) symname);
3055                   break;
3056                 }
3057               if (symname == NULL)
3058                 {
3059                   lr_error (ldfile, _("\
3060 %s: unknown character in equivalent definition value"),
3061                             "LC_COLLATE");
3062                   goto sym_equiv_free;
3063                 }
3064
3065               /* See whether the symbol name is already defined.  */
3066               if (find_entry (&collate->sym_table, symname, symname_len,
3067                               &symval) != 0)
3068                 {
3069                   lr_error (ldfile, _("\
3070 %s: unknown symbol `%s' in equivalent definition"),
3071                             "LC_COLLATE", symname);
3072                   goto sym_equiv_free;
3073                 }
3074
3075               if (insert_entry (&collate->sym_table,
3076                                 newname, newname_len, symval) < 0)
3077                 {
3078                   lr_error (ldfile, _("\
3079 error while adding equivalent collating symbol"));
3080                   goto sym_equiv_free;
3081                 }
3082
3083               free ((char *) symname);
3084             }
3085           lr_ignore_rest (ldfile, 1);
3086           break;
3087
3088         case tok_script:
3089           /* We get told about the scripts we know.  */
3090           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3091           if (arg->tok != tok_bsymbol)
3092             goto err_label;
3093           else
3094             {
3095               struct section_list *runp = collate->known_sections;
3096               char *name;
3097
3098               while (runp != NULL)
3099                 if (strncmp (runp->name, arg->val.str.startmb,
3100                              arg->val.str.lenmb) == 0
3101                     && runp->name[arg->val.str.lenmb] == '\0')
3102                   break;
3103                 else
3104                   runp = runp->def_next;
3105
3106               if (runp != NULL)
3107                 {
3108                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3109                             runp->name);
3110                   lr_ignore_rest (ldfile, 0);
3111                   break;
3112                 }
3113
3114               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3115               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3116               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3117               name[arg->val.str.lenmb] = '\0';
3118               runp->name = name;
3119
3120               runp->def_next = collate->known_sections;
3121               collate->known_sections = runp;
3122             }
3123           lr_ignore_rest (ldfile, 1);
3124           break;
3125
3126         case tok_order_start:
3127           /* Ignore the rest of the line if we don't need the input of
3128              this line.  */
3129           if (ignore_content)
3130             {
3131               lr_ignore_rest (ldfile, 0);
3132               break;
3133             }
3134
3135           if (state != 0 && state != 1 && state != 2)
3136             goto err_label;
3137           state = 1;
3138
3139           /* The 14652 draft does not specify whether all `order_start' lines
3140              must contain the same number of sort-rules, but 14651 does.  So
3141              we require this here as well.  */
3142           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3143           if (arg->tok == tok_bsymbol)
3144             {
3145               /* This better should be a section name.  */
3146               struct section_list *sp = collate->known_sections;
3147               while (sp != NULL
3148                      && (sp->name == NULL
3149                          || strncmp (sp->name, arg->val.str.startmb,
3150                                      arg->val.str.lenmb) != 0
3151                          || sp->name[arg->val.str.lenmb] != '\0'))
3152                 sp = sp->def_next;
3153
3154               if (sp == NULL)
3155                 {
3156                   lr_error (ldfile, _("\
3157 %s: unknown section name `%.*s'"),
3158                             "LC_COLLATE", (int) arg->val.str.lenmb,
3159                             arg->val.str.startmb);
3160                   /* We use the error section.  */
3161                   collate->current_section = &collate->error_section;
3162
3163                   if (collate->error_section.first == NULL)
3164                     {
3165                       /* Insert &collate->error_section at the end of
3166                          the collate->sections list.  */
3167                       if (collate->sections == NULL)
3168                         collate->sections = &collate->error_section;
3169                       else
3170                         {
3171                           sp = collate->sections;
3172                           while (sp->next != NULL)
3173                             sp = sp->next;
3174
3175                           sp->next = &collate->error_section;
3176                         }
3177                       collate->error_section.next = NULL;
3178                     }
3179                 }
3180               else
3181                 {
3182                   /* One should not be allowed to open the same
3183                      section twice.  */
3184                   if (sp->first != NULL)
3185                     lr_error (ldfile, _("\
3186 %s: multiple order definitions for section `%s'"),
3187                               "LC_COLLATE", sp->name);
3188                   else
3189                     {
3190                       /* Insert sp in the collate->sections list,
3191                          right after collate->current_section.  */
3192                       if (collate->current_section == NULL)
3193                         collate->current_section = sp;
3194                       else
3195                         {
3196                           sp->next = collate->current_section->next;
3197                           collate->current_section->next = sp;
3198                         }
3199                     }
3200
3201                   /* Next should come the end of the line or a semicolon.  */
3202                   arg = lr_token (ldfile, charmap, result, repertoire,
3203                                   verbose);
3204                   if (arg->tok == tok_eol)
3205                     {
3206                       uint32_t cnt;
3207
3208                       /* This means we have exactly one rule: `forward'.  */
3209                       if (nrules > 1)
3210                         lr_error (ldfile, _("\
3211 %s: invalid number of sorting rules"),
3212                                   "LC_COLLATE");
3213                       else
3214                         nrules = 1;
3215                       sp->rules = obstack_alloc (&collate->mempool,
3216                                                  (sizeof (enum coll_sort_rule)
3217                                                   * nrules));
3218                       for (cnt = 0; cnt < nrules; ++cnt)
3219                         sp->rules[cnt] = sort_forward;
3220
3221                       /* Next line.  */
3222                       break;
3223                     }
3224
3225                   /* Get the next token.  */
3226                   arg = lr_token (ldfile, charmap, result, repertoire,
3227                                   verbose);
3228                 }
3229             }
3230           else
3231             {
3232               /* There is no section symbol.  Therefore we use the unnamed
3233                  section.  */
3234               collate->current_section = &collate->unnamed_section;
3235
3236               if (collate->unnamed_section.first != NULL)
3237                 lr_error (ldfile, _("\
3238 %s: multiple order definitions for unnamed section"),
3239                           "LC_COLLATE");
3240               else
3241                 {
3242                   /* Insert &collate->unnamed_section at the beginning of
3243                      the collate->sections list.  */
3244                   collate->unnamed_section.next = collate->sections;
3245                   collate->sections = &collate->unnamed_section;
3246                 }
3247             }
3248
3249           /* Now read the direction names.  */
3250           read_directions (ldfile, arg, charmap, repertoire, result);
3251
3252           /* From now we need the strings untranslated.  */
3253           ldfile->translate_strings = 0;
3254           break;
3255
3256         case tok_order_end:
3257           /* Ignore the rest of the line if we don't need the input of
3258              this line.  */
3259           if (ignore_content)
3260             {
3261               lr_ignore_rest (ldfile, 0);
3262               break;
3263             }
3264
3265           if (state != 1)
3266             goto err_label;
3267
3268           /* Handle ellipsis at end of list.  */
3269           if (was_ellipsis != tok_none)
3270             {
3271               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3272                                repertoire, result);
3273               was_ellipsis = tok_none;
3274             }
3275
3276           state = 2;
3277           lr_ignore_rest (ldfile, 1);
3278           break;
3279
3280         case tok_reorder_after:
3281           /* Ignore the rest of the line if we don't need the input of
3282              this line.  */
3283           if (ignore_content)
3284             {
3285               lr_ignore_rest (ldfile, 0);
3286               break;
3287             }
3288
3289           if (state == 1)
3290             {
3291               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3292                         "LC_COLLATE");
3293               state = 2;
3294
3295               /* Handle ellipsis at end of list.  */
3296               if (was_ellipsis != tok_none)
3297                 {
3298                   handle_ellipsis (ldfile, arg->val.str.startmb,
3299                                    arg->val.str.lenmb, was_ellipsis, charmap,
3300                                    repertoire, result);
3301                   was_ellipsis = tok_none;
3302                 }
3303             }
3304           else if (state != 2 && state != 3)
3305             goto err_label;
3306           state = 3;
3307
3308           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3309           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3310             {
3311               /* Find this symbol in the sequence table.  */
3312               char ucsbuf[10];
3313               char *startmb;
3314               size_t lenmb;
3315               struct element_t *insp;
3316               int no_error = 1;
3317               void *ptr;
3318
3319               if (arg->tok == tok_bsymbol)
3320                 {
3321                   startmb = arg->val.str.startmb;
3322                   lenmb = arg->val.str.lenmb;
3323                 }
3324               else
3325                 {
3326                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3327                   startmb = ucsbuf;
3328                   lenmb = 9;
3329                 }
3330
3331               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3332                 /* Yes, the symbol exists.  Simply point the cursor
3333                    to it.  */
3334                 collate->cursor = (struct element_t *) ptr;
3335               else
3336                 {
3337                   struct symbol_t *symbp;
3338                   void *ptr;
3339
3340                   if (find_entry (&collate->sym_table, startmb, lenmb,
3341                                   &ptr) == 0)
3342                     {
3343                       symbp = ptr;
3344
3345                       if (symbp->order->last != NULL
3346                           || symbp->order->next != NULL)
3347                         collate->cursor = symbp->order;
3348                       else
3349                         {
3350                           /* This is a collating symbol but its position
3351                              is not yet defined.  */
3352                           lr_error (ldfile, _("\
3353 %s: order for collating symbol %.*s not yet defined"),
3354                                     "LC_COLLATE", (int) lenmb, startmb);
3355                           collate->cursor = NULL;
3356                           no_error = 0;
3357                         }
3358                     }
3359                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3360                                        &ptr) == 0)
3361                     {
3362                       insp = (struct element_t *) ptr;
3363
3364                       if (insp->last != NULL || insp->next != NULL)
3365                         collate->cursor = insp;
3366                       else
3367                         {
3368                           /* This is a collating element but its position
3369                              is not yet defined.  */
3370                           lr_error (ldfile, _("\
3371 %s: order for collating element %.*s not yet defined"),
3372                                     "LC_COLLATE", (int) lenmb, startmb);
3373                           collate->cursor = NULL;
3374                           no_error = 0;
3375                         }
3376                     }
3377                   else
3378                     {
3379                       /* This is bad.  The symbol after which we have to
3380                          insert does not exist.  */
3381                       lr_error (ldfile, _("\
3382 %s: cannot reorder after %.*s: symbol not known"),
3383                                 "LC_COLLATE", (int) lenmb, startmb);
3384                       collate->cursor = NULL;
3385                       no_error = 0;
3386                     }
3387                 }
3388
3389               lr_ignore_rest (ldfile, no_error);
3390             }
3391           else
3392             /* This must not happen.  */
3393             goto err_label;
3394           break;
3395
3396         case tok_reorder_end:
3397           /* Ignore the rest of the line if we don't need the input of
3398              this line.  */
3399           if (ignore_content)
3400             break;
3401
3402           if (state != 3)
3403             goto err_label;
3404           state = 4;
3405           lr_ignore_rest (ldfile, 1);
3406           break;
3407
3408         case tok_reorder_sections_after:
3409           /* Ignore the rest of the line if we don't need the input of
3410              this line.  */
3411           if (ignore_content)
3412             {
3413               lr_ignore_rest (ldfile, 0);
3414               break;
3415             }
3416
3417           if (state == 1)
3418             {
3419               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3420                         "LC_COLLATE");
3421               state = 2;
3422
3423               /* Handle ellipsis at end of list.  */
3424               if (was_ellipsis != tok_none)
3425                 {
3426                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3427                                    repertoire, result);
3428                   was_ellipsis = tok_none;
3429                 }
3430             }
3431           else if (state == 3)
3432             {
3433               WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3435               state = 4;
3436             }
3437           else if (state != 2 && state != 4)
3438             goto err_label;
3439           state = 5;
3440
3441           /* Get the name of the sections we are adding after.  */
3442           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3443           if (arg->tok == tok_bsymbol)
3444             {
3445               /* Now find a section with this name.  */
3446               struct section_list *runp = collate->sections;
3447
3448               while (runp != NULL)
3449                 {
3450                   if (runp->name != NULL
3451                       && strlen (runp->name) == arg->val.str.lenmb
3452                       && memcmp (runp->name, arg->val.str.startmb,
3453                                  arg->val.str.lenmb) == 0)
3454                     break;
3455
3456                   runp = runp->next;
3457                 }
3458
3459               if (runp != NULL)
3460                 collate->current_section = runp;
3461               else
3462                 {
3463                   /* This is bad.  The section after which we have to
3464                      reorder does not exist.  Therefore we cannot
3465                      process the whole rest of this reorder
3466                      specification.  */
3467                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3468                             "LC_COLLATE", (int) arg->val.str.lenmb,
3469                             arg->val.str.startmb);
3470
3471                   do
3472                     {
3473                       lr_ignore_rest (ldfile, 0);
3474
3475                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3476                     }
3477                   while (now->tok == tok_reorder_sections_after
3478                          || now->tok == tok_reorder_sections_end
3479                          || now->tok == tok_end);
3480
3481                   /* Process the token we just saw.  */
3482                   nowtok = now->tok;
3483                   continue;
3484                 }
3485             }
3486           else
3487             /* This must not happen.  */
3488             goto err_label;
3489           break;
3490
3491         case tok_reorder_sections_end:
3492           /* Ignore the rest of the line if we don't need the input of
3493              this line.  */
3494           if (ignore_content)
3495             break;
3496
3497           if (state != 5)
3498             goto err_label;
3499           state = 6;
3500           lr_ignore_rest (ldfile, 1);
3501           break;
3502
3503         case tok_bsymbol:
3504         case tok_ucs4:
3505           /* Ignore the rest of the line if we don't need the input of
3506              this line.  */
3507           if (ignore_content)
3508             {
3509               lr_ignore_rest (ldfile, 0);
3510               break;
3511             }
3512
3513           if (state != 0 && state != 1 && state != 3 && state != 5)
3514             goto err_label;
3515
3516           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3517             goto err_label;
3518
3519           if (nowtok == tok_ucs4)
3520             {
3521               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3522               symstr = ucs4buf;
3523               symlen = 9;
3524             }
3525           else if (arg != NULL)
3526             {
3527               symstr = arg->val.str.startmb;
3528               symlen = arg->val.str.lenmb;
3529             }
3530           else
3531             {
3532               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3533                         (int) ldfile->token.val.str.lenmb,
3534                         ldfile->token.val.str.startmb);
3535               break;
3536             }
3537
3538           struct element_t *seqp;
3539           if (state == 0)
3540             {
3541               /* We are outside an `order_start' region.  This means
3542                  we must only accept definitions of values for
3543                  collation symbols since these are purely abstract
3544                  values and don't need directions associated.  */
3545               void *ptr;
3546
3547               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3548                 {
3549                   seqp = ptr;
3550
3551                   /* It's already defined.  First check whether this
3552                      is really a collating symbol.  */
3553                   if (seqp->is_character)
3554                     goto err_label;
3555
3556                   goto move_entry;
3557                 }
3558               else
3559                 {
3560                   void *result;
3561
3562                   if (find_entry (&collate->sym_table, symstr, symlen,
3563                                   &result) != 0)
3564                     /* No collating symbol, it's an error.  */
3565                     goto err_label;
3566
3567                   /* Maybe this is the first time we define a symbol
3568                      value and it is before the first actual section.  */
3569                   if (collate->sections == NULL)
3570                     collate->sections = collate->current_section =
3571                       &collate->symbol_section;
3572                 }
3573
3574               if (was_ellipsis != tok_none)
3575                 {
3576                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3577                                    charmap, repertoire, result);
3578
3579                   /* Remember that we processed the ellipsis.  */
3580                   was_ellipsis = tok_none;
3581
3582                   /* And don't add the value a second time.  */
3583                   break;
3584                 }
3585             }
3586           else if (state == 3)
3587             {
3588               /* It is possible that we already have this collation sequence.
3589                  In this case we move the entry.  */
3590               void *sym;
3591               void *ptr;
3592
3593               /* If the symbol after which we have to insert was not found
3594                  ignore all entries.  */
3595               if (collate->cursor == NULL)
3596                 {
3597                   lr_ignore_rest (ldfile, 0);
3598                   break;
3599                 }
3600
3601               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3602                 {
3603                   seqp = (struct element_t *) ptr;
3604                   goto move_entry;
3605                 }
3606
3607               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3608                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3609                 goto move_entry;
3610
3611               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3612                   && (seqp = (struct element_t *) ptr,
3613                       seqp->last != NULL || seqp->next != NULL
3614                       || (collate->start != NULL && seqp == collate->start)))
3615                 {
3616                 move_entry:
3617                   /* Remove the entry from the old position.  */
3618                   if (seqp->last == NULL)
3619                     collate->start = seqp->next;
3620                   else
3621                     seqp->last->next = seqp->next;
3622                   if (seqp->next != NULL)
3623                     seqp->next->last = seqp->last;
3624
3625                   /* We also have to check whether this entry is the
3626                      first or last of a section.  */
3627                   if (seqp->section->first == seqp)
3628                     {
3629                       if (seqp->section->first == seqp->section->last)
3630                         /* This section has no content anymore.  */
3631                         seqp->section->first = seqp->section->last = NULL;
3632                       else
3633                         seqp->section->first = seqp->next;
3634                     }
3635                   else if (seqp->section->last == seqp)
3636                     seqp->section->last = seqp->last;
3637
3638                   /* Now insert it in the new place.  */
3639                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3640                                   tok_none);
3641                   break;
3642                 }
3643
3644               /* Otherwise we just add a new entry.  */
3645             }
3646           else if (state == 5)
3647             {
3648               /* We are reordering sections.  Find the named section.  */
3649               struct section_list *runp = collate->sections;
3650               struct section_list *prevp = NULL;
3651
3652               while (runp != NULL)
3653                 {
3654                   if (runp->name != NULL
3655                       && strlen (runp->name) == symlen
3656                       && memcmp (runp->name, symstr, symlen) == 0)
3657                     break;
3658
3659                   prevp = runp;
3660                   runp = runp->next;
3661                 }
3662
3663               if (runp == NULL)
3664                 {
3665                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3666                             "LC_COLLATE", (int) symlen, symstr);
3667                   lr_ignore_rest (ldfile, 0);
3668                 }
3669               else
3670                 {
3671                   if (runp != collate->current_section)
3672                     {
3673                       /* Remove the named section from the old place and
3674                          insert it in the new one.  */
3675                       prevp->next = runp->next;
3676
3677                       runp->next = collate->current_section->next;
3678                       collate->current_section->next = runp;
3679                       collate->current_section = runp;
3680                     }
3681
3682                   /* Process the rest of the line which might change
3683                      the collation rules.  */
3684                   arg = lr_token (ldfile, charmap, result, repertoire,
3685                                   verbose);
3686                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3687                     read_directions (ldfile, arg, charmap, repertoire,
3688                                      result);
3689                 }
3690               break;
3691             }
3692           else if (was_ellipsis != tok_none)
3693             {
3694               /* Using the information in the `ellipsis_weight'
3695                  element and this and the last value we have to handle
3696                  the ellipsis now.  */
3697               assert (state == 1);
3698
3699               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3700                                repertoire, result);
3701
3702               /* Remember that we processed the ellipsis.  */
3703               was_ellipsis = tok_none;
3704
3705               /* And don't add the value a second time.  */
3706               break;
3707             }
3708
3709           /* Now insert in the new place.  */
3710           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3711           break;
3712
3713         case tok_undefined:
3714           /* Ignore the rest of the line if we don't need the input of
3715              this line.  */
3716           if (ignore_content)
3717             {
3718               lr_ignore_rest (ldfile, 0);
3719               break;
3720             }
3721
3722           if (state != 1)
3723             goto err_label;
3724
3725           if (was_ellipsis != tok_none)
3726             {
3727               lr_error (ldfile,
3728                         _("%s: cannot have `%s' as end of ellipsis range"),
3729                         "LC_COLLATE", "UNDEFINED");
3730
3731               unlink_element (collate);
3732               was_ellipsis = tok_none;
3733             }
3734
3735           /* See whether UNDEFINED already appeared somewhere.  */
3736           if (collate->undefined.next != NULL
3737               || &collate->undefined == collate->cursor)
3738             {
3739               lr_error (ldfile,
3740                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3741                         "LC_COLLATE", 9, "UNDEFINED",
3742                         collate->undefined.file,
3743                         collate->undefined.line);
3744               lr_ignore_rest (ldfile, 0);
3745             }
3746           else
3747             /* Parse the weights.  */
3748              insert_weights (ldfile, &collate->undefined, charmap,
3749                              repertoire, result, tok_none);
3750           break;
3751
3752         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3753         case tok_ellipsis3: /* absolute ellipsis */
3754         case tok_ellipsis4: /* symbolic decimal ellipsis */
3755           /* This is the symbolic (decimal or hexadecimal) or absolute
3756              ellipsis.  */
3757           if (was_ellipsis != tok_none)
3758             goto err_label;
3759
3760           if (state != 0 && state != 1 && state != 3)
3761             goto err_label;
3762
3763           was_ellipsis = nowtok;
3764
3765           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3766                           repertoire, result, nowtok);
3767           break;
3768
3769         case tok_end:
3770           /* Next we assume `LC_COLLATE'.  */
3771           if (!ignore_content)
3772             {
3773               if (state == 0)
3774                 /* We must either see a copy statement or have
3775                    ordering values.  */
3776                 lr_error (ldfile,
3777                           _("%s: empty category description not allowed"),
3778                           "LC_COLLATE");
3779               else if (state == 1)
3780                 {
3781                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3782                             "LC_COLLATE");
3783
3784                   /* Handle ellipsis at end of list.  */
3785                   if (was_ellipsis != tok_none)
3786                     {
3787                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3788                                        repertoire, result);
3789                       was_ellipsis = tok_none;
3790                     }
3791                 }
3792               else if (state == 3)
3793                 WITH_CUR_LOCALE (error (0, 0, _("\
3794 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3795               else if (state == 5)
3796                 WITH_CUR_LOCALE (error (0, 0, _("\
3797 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3798             }
3799           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3800           if (arg->tok == tok_eof)
3801             break;
3802           if (arg->tok == tok_eol)
3803             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3804           else if (arg->tok != tok_lc_collate)
3805             lr_error (ldfile, _("\
3806 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3807           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3808           return;
3809
3810         default:
3811         err_label:
3812           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3813         }
3814
3815       /* Prepare for the next round.  */
3816       now = lr_token (ldfile, charmap, result, NULL, verbose);
3817       nowtok = now->tok;
3818     }
3819
3820   /* When we come here we reached the end of the file.  */
3821   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3822 }