locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <errno.h>
  25 #include <error.h>
  26 #include <stdlib.h>
  27 #include <wchar.h>
  28 #include <sys/param.h>
  29
  30 #include "localedef.h"
  31 #include "charmap.h"
  32 #include "localeinfo.h"
  33 #include "linereader.h"
  34 #include "locfile.h"
  35 #include "elem-hash.h"
  36
  37 /* Uncomment the following line in the production version.  */
  38 /* #define NDEBUG 1 */
  39 #include <assert.h>
  40
  41 #define obstack_chunk_alloc malloc
  42 #define obstack_chunk_free free
  43
  44 static inline void
  45 __attribute ((always_inline))
  46 obstack_int32_grow (struct obstack *obstack, int32_t data)
  47 {
  48   if (sizeof (int32_t) == sizeof (int))
  49     obstack_int_grow (obstack, data);
  50   else
  51     obstack_grow (obstack, &data, sizeof (int32_t));
  52 }
  53
  54 static inline void
  55 __attribute ((always_inline))
  56 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  57 {
  58   if (sizeof (int32_t) == sizeof (int))
  59     obstack_int_grow_fast (obstack, data);
  60   else
  61     obstack_grow (obstack, &data, sizeof (int32_t));
  62 }
  63
  64 /* Forward declaration.  */
  65 struct element_t;
  66
  67 /* Data type for list of strings.  */
  68 struct section_list
  69 {
  70   /* Successor in the known_sections list.  */
  71   struct section_list *def_next;
  72   /* Successor in the sections list.  */
  73   struct section_list *next;
  74   /* Name of the section.  */
  75   const char *name;
  76   /* First element of this section.  */
  77   struct element_t *first;
  78   /* Last element of this section.  */
  79   struct element_t *last;
  80   /* These are the rules for this section.  */
  81   enum coll_sort_rule *rules;
  82   /* Index of the rule set in the appropriate section of the output file.  */
  83   int ruleidx;
  84 };
  85
  86 struct element_t;
  87
  88 struct element_list_t
  89 {
  90   /* Number of elements.  */
  91   int cnt;
  92
  93   struct element_t **w;
  94 };
  95
  96 /* Data type for collating element.  */
  97 struct element_t
  98 {
  99   const char *name;
 100
 101   const char *mbs;
 102   size_t nmbs;
 103   const uint32_t *wcs;
 104   size_t nwcs;
 105   int *mborder;
 106   int wcorder;
 107
 108   /* The following is a bit mask which bits are set if this element is
 109      used in the appropriate level.  Interesting for the singlebyte
 110      weight computation.
 111
 112      XXX The type here restricts the number of levels to 32.  It could
 113      be changed if necessary but I doubt this is necessary.  */
 114   unsigned int used_in_level;
 115
 116   struct element_list_t *weights;
 117
 118   /* Nonzero if this is a real character definition.  */
 119   int is_character;
 120
 121   /* Order of the character in the sequence.  This information will
 122      be used in range expressions.  */
 123   int mbseqorder;
 124   int wcseqorder;
 125
 126   /* Where does the definition come from.  */
 127   const char *file;
 128   size_t line;
 129
 130   /* Which section does this belong to.  */
 131   struct section_list *section;
 132
 133   /* Predecessor and successor in the order list.  */
 134   struct element_t *last;
 135   struct element_t *next;
 136
 137   /* Next element in multibyte output list.  */
 138   struct element_t *mbnext;
 139   struct element_t *mblast;
 140
 141   /* Next element in wide character output list.  */
 142   struct element_t *wcnext;
 143   struct element_t *wclast;
 144 };
 145
 146 /* Special element value.  */
 147 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 148 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 149 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 150
 151 /* Data type for collating symbol.  */
 152 struct symbol_t
 153 {
 154   const char *name;
 155
 156   /* Point to place in the order list.  */
 157   struct element_t *order;
 158
 159   /* Where does the definition come from.  */
 160   const char *file;
 161   size_t line;
 162 };
 163
 164 /* Sparse table of struct element_t *.  */
 165 #define TABLE wchead_table
 166 #define ELEMENT struct element_t *
 167 #define DEFAULT NULL
 168 #define ITERATE
 169 #define NO_FINALIZE
 170 #include "3level.h"
 171
 172 /* Sparse table of int32_t.  */
 173 #define TABLE collidx_table
 174 #define ELEMENT int32_t
 175 #define DEFAULT 0
 176 #include "3level.h"
 177
 178 /* Sparse table of uint32_t.  */
 179 #define TABLE collseq_table
 180 #define ELEMENT uint32_t
 181 #define DEFAULT ~((uint32_t) 0)
 182 #include "3level.h"
 183
 184
 185 /* The real definition of the struct for the LC_COLLATE locale.  */
 186 struct locale_collate_t
 187 {
 188   int col_weight_max;
 189   int cur_weight_max;
 190
 191   /* List of known scripts.  */
 192   struct section_list *known_sections;
 193   /* List of used sections.  */
 194   struct section_list *sections;
 195   /* Current section using definition.  */
 196   struct section_list *current_section;
 197   /* There always can be an unnamed section.  */
 198   struct section_list unnamed_section;
 199   /* To make handling of errors easier we have another section.  */
 200   struct section_list error_section;
 201   /* Sometimes we are defining the values for collating symbols before
 202      the first actual section.  */
 203   struct section_list symbol_section;
 204
 205   /* Start of the order list.  */
 206   struct element_t *start;
 207
 208   /* The undefined element.  */
 209   struct element_t undefined;
 210
 211   /* This is the cursor for `reorder_after' insertions.  */
 212   struct element_t *cursor;
 213
 214   /* This value is used when handling ellipsis.  */
 215   struct element_t ellipsis_weight;
 216
 217   /* Known collating elements.  */
 218   hash_table elem_table;
 219
 220   /* Known collating symbols.  */
 221   hash_table sym_table;
 222
 223   /* Known collation sequences.  */
 224   hash_table seq_table;
 225
 226   struct obstack mempool;
 227
 228   /* The LC_COLLATE category is a bit special as it is sometimes possible
 229      that the definitions from more than one input file contains information.
 230      Therefore we keep all relevant input in a list.  */
 231   struct locale_collate_t *next;
 232
 233   /* Arrays with heads of the list for each of the leading bytes in
 234      the multibyte sequences.  */
 235   struct element_t *mbheads[256];
 236
 237   /* Arrays with heads of the list for each of the leading bytes in
 238      the multibyte sequences.  */
 239   struct wchead_table wcheads;
 240
 241   /* The arrays with the collation sequence order.  */
 242   unsigned char mbseqorder[256];
 243   struct collseq_table wcseqorder;
 244 };
 245
 246
 247 /* We have a few global variables which are used for reading all
 248    LC_COLLATE category descriptions in all files.  */
 249 static uint32_t nrules;
 250
 251
 252 /* We need UTF-8 encoding of numbers.  */
 253 static inline int
 254 __attribute ((always_inline))
 255 utf8_encode (char *buf, int val)
 256 {
 257   int retval;
 258
 259   if (val < 0x80)
 260     {
 261       *buf++ = (char) val;
 262       retval = 1;
 263     }
 264   else
 265     {
 266       int step;
 267
 268       for (step = 2; step < 6; ++step)
 269         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 270           break;
 271       retval = step;
 272
 273       *buf = (unsigned char) (~0xff >> step);
 274       --step;
 275       do
 276         {
 277           buf[step] = 0x80 | (val & 0x3f);
 278           val >>= 6;
 279         }
 280       while (--step > 0);
 281       *buf |= val;
 282     }
 283
 284   return retval;
 285 }
 286
 287
 288 static struct section_list *
 289 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 290                    struct section_list *next)
 291 {
 292   struct section_list *newp;
 293
 294   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 295                                                 sizeof (*newp));
 296   newp->next = next;
 297   newp->name = string;
 298   newp->first = NULL;
 299   newp->last = NULL;
 300
 301   return newp;
 302 }
 303
 304
 305 static struct element_t *
 306 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 307              const uint32_t *wcs, const char *name, size_t namelen,
 308              int is_character)
 309 {
 310   struct element_t *newp;
 311
 312   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 313                                              sizeof (*newp));
 314   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 315                                                     name, namelen);
 316   if (mbs != NULL)
 317     {
 318       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 319       newp->nmbs = mbslen;
 320     }
 321   else
 322     {
 323       newp->mbs = NULL;
 324       newp->nmbs = 0;
 325     }
 326   if (wcs != NULL)
 327     {
 328       size_t nwcs = wcslen ((wchar_t *) wcs);
 329       uint32_t zero = 0;
 330       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 331       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 332       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 333       newp->nwcs = nwcs;
 334     }
 335   else
 336     {
 337       newp->wcs = NULL;
 338       newp->nwcs = 0;
 339     }
 340   newp->mborder = NULL;
 341   newp->wcorder = 0;
 342   newp->used_in_level = 0;
 343   newp->is_character = is_character;
 344
 345   /* Will be assigned later.  XXX  */
 346   newp->mbseqorder = 0;
 347   newp->wcseqorder = 0;
 348
 349   /* Will be allocated later.  */
 350   newp->weights = NULL;
 351
 352   newp->file = NULL;
 353   newp->line = 0;
 354
 355   newp->section = collate->current_section;
 356
 357   newp->last = NULL;
 358   newp->next = NULL;
 359
 360   newp->mbnext = NULL;
 361   newp->mblast = NULL;
 362
 363   newp->wcnext = NULL;
 364   newp->wclast = NULL;
 365
 366   return newp;
 367 }
 368
 369
 370 static struct symbol_t *
 371 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 372 {
 373   struct symbol_t *newp;
 374
 375   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 376
 377   newp->name = obstack_copy0 (&collate->mempool, name, len);
 378   newp->order = NULL;
 379
 380   newp->file = NULL;
 381   newp->line = 0;
 382
 383   return newp;
 384 }
 385
 386
 387 /* Test whether this name is already defined somewhere.  */
 388 static int
 389 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 390                  const struct charmap_t *charmap,
 391                  struct repertoire_t *repertoire, const char *symbol,
 392                  size_t symbol_len)
 393 {
 394   void *ignore = NULL;
 395
 396   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 397     {
 398       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 399                 (int) symbol_len, symbol);
 400       return 1;
 401     }
 402
 403   if (repertoire != NULL
 404       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 405           == 0))
 406     {
 407       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 408                 (int) symbol_len, symbol);
 409       return 1;
 410     }
 411
 412   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 413     {
 414       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 415                 (int) symbol_len, symbol);
 416       return 1;
 417     }
 418
 419   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 420     {
 421       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 422                 (int) symbol_len, symbol);
 423       return 1;
 424     }
 425
 426   return 0;
 427 }
 428
 429
 430 /* Read the direction specification.  */
 431 static void
 432 read_directions (struct linereader *ldfile, struct token *arg,
 433                  const struct charmap_t *charmap,
 434                  struct repertoire_t *repertoire, struct localedef_t *result)
 435 {
 436   int cnt = 0;
 437   int max = nrules ?: 10;
 438   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 439   int warned = 0;
 440   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 441
 442   while (1)
 443     {
 444       int valid = 0;
 445
 446       if (arg->tok == tok_forward)
 447         {
 448           if (rules[cnt] & sort_backward)
 449             {
 450               if (! warned)
 451                 {
 452                   lr_error (ldfile, _("\
 453 %s: `forward' and `backward' are mutually excluding each other"),
 454                             "LC_COLLATE");
 455                   warned = 1;
 456                 }
 457             }
 458           else if (rules[cnt] & sort_forward)
 459             {
 460               if (! warned)
 461                 {
 462                   lr_error (ldfile, _("\
 463 %s: `%s' mentioned more than once in definition of weight %d"),
 464                             "LC_COLLATE", "forward", cnt + 1);
 465                 }
 466             }
 467           else
 468             rules[cnt] |= sort_forward;
 469
 470           valid = 1;
 471         }
 472       else if (arg->tok == tok_backward)
 473         {
 474           if (rules[cnt] & sort_forward)
 475             {
 476               if (! warned)
 477                 {
 478                   lr_error (ldfile, _("\
 479 %s: `forward' and `backward' are mutually excluding each other"),
 480                             "LC_COLLATE");
 481                   warned = 1;
 482                 }
 483             }
 484           else if (rules[cnt] & sort_backward)
 485             {
 486               if (! warned)
 487                 {
 488                   lr_error (ldfile, _("\
 489 %s: `%s' mentioned more than once in definition of weight %d"),
 490                             "LC_COLLATE", "backward", cnt + 1);
 491                 }
 492             }
 493           else
 494             rules[cnt] |= sort_backward;
 495
 496           valid = 1;
 497         }
 498       else if (arg->tok == tok_position)
 499         {
 500           if (rules[cnt] & sort_position)
 501             {
 502               if (! warned)
 503                 {
 504                   lr_error (ldfile, _("\
 505 %s: `%s' mentioned more than once in definition of weight %d"),
 506                             "LC_COLLATE", "position", cnt + 1);
 507                 }
 508             }
 509           else
 510             rules[cnt] |= sort_position;
 511
 512           valid = 1;
 513         }
 514
 515       if (valid)
 516         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 517
 518       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 519           || arg->tok == tok_semicolon)
 520         {
 521           if (! valid && ! warned)
 522             {
 523               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 524               warned = 1;
 525             }
 526
 527           /* See whether we have to increment the counter.  */
 528           if (arg->tok != tok_comma && rules[cnt] != 0)
 529             {
 530               /* Add the default `forward' if we have seen only `position'.  */
 531               if (rules[cnt] == sort_position)
 532                 rules[cnt] = sort_position | sort_forward;
 533
 534               ++cnt;
 535             }
 536
 537           if (arg->tok == tok_eof || arg->tok == tok_eol)
 538             /* End of line or file, so we exit the loop.  */
 539             break;
 540
 541           if (nrules == 0)
 542             {
 543               /* See whether we have enough room in the array.  */
 544               if (cnt == max)
 545                 {
 546                   max += 10;
 547                   rules = (enum coll_sort_rule *) xrealloc (rules,
 548                                                             max
 549                                                             * sizeof (*rules));
 550                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 551                 }
 552             }
 553           else
 554             {
 555               if (cnt == nrules)
 556                 {
 557                   /* There must not be any more rule.  */
 558                   if (! warned)
 559                     {
 560                       lr_error (ldfile, _("\
 561 %s: too many rules; first entry only had %d"),
 562                                 "LC_COLLATE", nrules);
 563                       warned = 1;
 564                     }
 565
 566                   lr_ignore_rest (ldfile, 0);
 567                   break;
 568                 }
 569             }
 570         }
 571       else
 572         {
 573           if (! warned)
 574             {
 575               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 576               warned = 1;
 577             }
 578         }
 579
 580       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 581     }
 582
 583   if (nrules == 0)
 584     {
 585       /* Now we know how many rules we have.  */
 586       nrules = cnt;
 587       rules = (enum coll_sort_rule *) xrealloc (rules,
 588                                                 nrules * sizeof (*rules));
 589     }
 590   else
 591     {
 592       if (cnt < nrules)
 593         {
 594           /* Not enough rules in this specification.  */
 595           if (! warned)
 596             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 597
 598           do
 599             rules[cnt] = sort_forward;
 600           while (++cnt < nrules);
 601         }
 602     }
 603
 604   collate->current_section->rules = rules;
 605 }
 606
 607
 608 static struct element_t *
 609 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 610               const char *str, size_t len)
 611 {
 612   void *result = NULL;
 613
 614   /* Search for the entries among the collation sequences already define.  */
 615   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 616     {
 617       /* Nope, not define yet.  So we see whether it is a
 618          collation symbol.  */
 619       void *ptr;
 620
 621       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 622         {
 623           /* It's a collation symbol.  */
 624           struct symbol_t *sym = (struct symbol_t *) ptr;
 625           result = sym->order;
 626
 627           if (result == NULL)
 628             result = sym->order = new_element (collate, NULL, 0, NULL,
 629                                                NULL, 0, 0);
 630         }
 631       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 632         {
 633           /* It's also no collation element.  So it is a character
 634              element defined later.  */
 635           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 636           /* Insert it into the sequence table.  */
 637           insert_entry (&collate->seq_table, str, len, result);
 638         }
 639     }
 640
 641   return (struct element_t *) result;
 642 }
 643
 644
 645 static void
 646 unlink_element (struct locale_collate_t *collate)
 647 {
 648   if (collate->cursor == collate->start)
 649     {
 650       assert (collate->cursor->next == NULL);
 651       assert (collate->cursor->last == NULL);
 652       collate->cursor = NULL;
 653     }
 654   else
 655     {
 656       if (collate->cursor->next != NULL)
 657         collate->cursor->next->last = collate->cursor->last;
 658       if (collate->cursor->last != NULL)
 659         collate->cursor->last->next = collate->cursor->next;
 660       collate->cursor = collate->cursor->last;
 661     }
 662 }
 663
 664
 665 static void
 666 insert_weights (struct linereader *ldfile, struct element_t *elem,
 667                 const struct charmap_t *charmap,
 668                 struct repertoire_t *repertoire, struct localedef_t *result,
 669                 enum token_t ellipsis)
 670 {
 671   int weight_cnt;
 672   struct token *arg;
 673   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 674
 675   /* Initialize all the fields.  */
 676   elem->file = ldfile->fname;
 677   elem->line = ldfile->lineno;
 678
 679   elem->last = collate->cursor;
 680   elem->next = collate->cursor ? collate->cursor->next : NULL;
 681   if (collate->cursor != NULL && collate->cursor->next != NULL)
 682     collate->cursor->next->last = elem;
 683   if (collate->cursor != NULL)
 684     collate->cursor->next = elem;
 685   if (collate->start == NULL)
 686     {
 687       assert (collate->cursor == NULL);
 688       collate->start = elem;
 689     }
 690
 691   elem->section = collate->current_section;
 692
 693   if (collate->current_section->first == NULL)
 694     collate->current_section->first = elem;
 695   if (collate->current_section->last == collate->cursor)
 696     collate->current_section->last = elem;
 697
 698   collate->cursor = elem;
 699
 700   elem->weights = (struct element_list_t *)
 701     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 702   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 703
 704   weight_cnt = 0;
 705
 706   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 707   do
 708     {
 709       if (arg->tok == tok_eof || arg->tok == tok_eol)
 710         break;
 711
 712       if (arg->tok == tok_ignore)
 713         {
 714           /* The weight for this level has to be ignored.  We use the
 715              null pointer to indicate this.  */
 716           elem->weights[weight_cnt].w = (struct element_t **)
 717             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 718           elem->weights[weight_cnt].w[0] = NULL;
 719           elem->weights[weight_cnt].cnt = 1;
 720         }
 721       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 722         {
 723           char ucs4str[10];
 724           struct element_t *val;
 725           char *symstr;
 726           size_t symlen;
 727
 728           if (arg->tok == tok_bsymbol)
 729             {
 730               symstr = arg->val.str.startmb;
 731               symlen = arg->val.str.lenmb;
 732             }
 733           else
 734             {
 735               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 736               symstr = ucs4str;
 737               symlen = 9;
 738             }
 739
 740           val = find_element (ldfile, collate, symstr, symlen);
 741           if (val == NULL)
 742             break;
 743
 744           elem->weights[weight_cnt].w = (struct element_t **)
 745             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 746           elem->weights[weight_cnt].w[0] = val;
 747           elem->weights[weight_cnt].cnt = 1;
 748         }
 749       else if (arg->tok == tok_string)
 750         {
 751           /* Split the string up in the individual characters and put
 752              the element definitions in the list.  */
 753           const char *cp = arg->val.str.startmb;
 754           int cnt = 0;
 755           struct element_t *charelem;
 756           struct element_t **weights = NULL;
 757           int max = 0;
 758
 759           if (*cp == '\0')
 760             {
 761               lr_error (ldfile, _("%s: empty weight string not allowed"),
 762                         "LC_COLLATE");
 763               lr_ignore_rest (ldfile, 0);
 764               break;
 765             }
 766
 767           do
 768             {
 769               if (*cp == '<')
 770                 {
 771                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 772                      the latter we have to unify the name.  */
 773                   const char *startp = ++cp;
 774                   size_t len;
 775
 776                   while (*cp != '>')
 777                     {
 778                       if (*cp == ldfile->escape_char)
 779                         ++cp;
 780                       if (*cp == '\0')
 781                         /* It's a syntax error.  */
 782                         goto syntax;
 783
 784                       ++cp;
 785                     }
 786
 787                   if (cp - startp == 5 && startp[0] == 'U'
 788                       && isxdigit (startp[1]) && isxdigit (startp[2])
 789                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 790                     {
 791                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 792                       char *newstr;
 793
 794                       newstr = (char *) xmalloc (10);
 795                       snprintf (newstr, 10, "U%08X", ucs4);
 796                       startp = newstr;
 797
 798                       len = 9;
 799                     }
 800                   else
 801                     len = cp - startp;
 802
 803                   charelem = find_element (ldfile, collate, startp, len);
 804                   ++cp;
 805                 }
 806               else
 807                 {
 808                   /* People really shouldn't use characters directly in
 809                      the string.  Especially since it's not really clear
 810                      what this means.  We interpret all characters in the
 811                      string as if that would be bsymbols.  Otherwise we
 812                      would have to match back to bsymbols somehow and this
 813                      is normally not what people normally expect.  */
 814                   charelem = find_element (ldfile, collate, cp++, 1);
 815                 }
 816
 817               if (charelem == NULL)
 818                 {
 819                   /* We ignore the rest of the line.  */
 820                   lr_ignore_rest (ldfile, 0);
 821                   break;
 822                 }
 823
 824               /* Add the pointer.  */
 825               if (cnt >= max)
 826                 {
 827                   struct element_t **newp;
 828                   max += 10;
 829                   newp = (struct element_t **)
 830                     alloca (max * sizeof (struct element_t *));
 831                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 832                   weights = newp;
 833                 }
 834               weights[cnt++] = charelem;
 835             }
 836           while (*cp != '\0');
 837
 838           /* Now store the information.  */
 839           elem->weights[weight_cnt].w = (struct element_t **)
 840             obstack_alloc (&collate->mempool,
 841                            cnt * sizeof (struct element_t *));
 842           memcpy (elem->weights[weight_cnt].w, weights,
 843                   cnt * sizeof (struct element_t *));
 844           elem->weights[weight_cnt].cnt = cnt;
 845
 846           /* We don't need the string anymore.  */
 847           free (arg->val.str.startmb);
 848         }
 849       else if (ellipsis != tok_none
 850                && (arg->tok == tok_ellipsis2
 851                    || arg->tok == tok_ellipsis3
 852                    || arg->tok == tok_ellipsis4))
 853         {
 854           /* It must be the same ellipsis as used in the initial column.  */
 855           if (arg->tok != ellipsis)
 856             lr_error (ldfile, _("\
 857 %s: weights must use the same ellipsis symbol as the name"),
 858                       "LC_COLLATE");
 859
 860           /* The weight for this level will depend on the element
 861              iterating over the range.  Put a placeholder.  */
 862           elem->weights[weight_cnt].w = (struct element_t **)
 863             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 864           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 865           elem->weights[weight_cnt].cnt = 1;
 866         }
 867       else
 868         {
 869         syntax:
 870           /* It's a syntax error.  */
 871           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 872           lr_ignore_rest (ldfile, 0);
 873           break;
 874         }
 875
 876       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 877       /* This better should be the end of the line or a semicolon.  */
 878       if (arg->tok == tok_semicolon)
 879         /* OK, ignore this and read the next token.  */
 880         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 881       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 882         {
 883           /* It's a syntax error.  */
 884           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 885           lr_ignore_rest (ldfile, 0);
 886           break;
 887         }
 888     }
 889   while (++weight_cnt < nrules);
 890
 891   if (weight_cnt < nrules)
 892     {
 893       /* This means the rest of the line uses the current element as
 894          the weight.  */
 895       do
 896         {
 897           elem->weights[weight_cnt].w = (struct element_t **)
 898             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 899           if (ellipsis == tok_none)
 900             elem->weights[weight_cnt].w[0] = elem;
 901           else
 902             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 903           elem->weights[weight_cnt].cnt = 1;
 904         }
 905       while (++weight_cnt < nrules);
 906     }
 907   else
 908     {
 909       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 910         {
 911           /* Too many rule values.  */
 912           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 913           lr_ignore_rest (ldfile, 0);
 914         }
 915       else
 916         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 917     }
 918 }
 919
 920
 921 static int
 922 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 923               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 924               struct localedef_t *result)
 925 {
 926   /* First find out what kind of symbol this is.  */
 927   struct charseq *seq;
 928   uint32_t wc;
 929   struct element_t *elem = NULL;
 930   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 931
 932   /* Try to find the character in the charmap.  */
 933   seq = charmap_find_value (charmap, symstr, symlen);
 934
 935   /* Determine the wide character.  */
 936   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 937     {
 938       wc = repertoire_find_value (repertoire, symstr, symlen);
 939       if (seq != NULL)
 940         seq->ucs4 = wc;
 941     }
 942   else
 943     wc = seq->ucs4;
 944
 945   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 946     {
 947       /* It's no character, so look through the collation elements and
 948          symbol list.  */
 949       void *ptr = elem;
 950       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 951         {
 952           void *result;
 953           struct symbol_t *sym = NULL;
 954
 955           /* It's also collation element.  Therefore it's either a
 956              collating symbol or it's a character which is not
 957              supported by the character set.  In the later case we
 958              simply create a dummy entry.  */
 959           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 960             {
 961               /* It's a collation symbol.  */
 962               sym = (struct symbol_t *) result;
 963
 964               elem = sym->order;
 965             }
 966
 967           if (elem == NULL)
 968             {
 969               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 970
 971               if (sym != NULL)
 972                 sym->order = elem;
 973               else
 974                 /* Enter a fake element in the sequence table.  This
 975                    won't cause anything in the output since there is
 976                    no multibyte or wide character associated with
 977                    it.  */
 978                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 979             }
 980         }
 981       else
 982         /* Copy the result back.  */
 983         elem = ptr;
 984     }
 985   else
 986     {
 987       /* Otherwise the symbols stands for a character.  */
 988       void *ptr = elem;
 989       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
 990         {
 991           uint32_t wcs[2] = { wc, 0 };
 992
 993           /* We have to allocate an entry.  */
 994           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 995                               seq != NULL ? seq->nbytes : 0,
 996                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 997                               symstr, symlen, 1);
 998
 999           /* And add it to the table.  */
1000           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1001             /* This cannot happen.  */
1002             assert (! "Internal error");
1003         }
1004       else
1005         {
1006           /* Copy the result back.  */
1007           elem = ptr;
1008
1009           /* Maybe the character was used before the definition.  In this case
1010              we have to insert the byte sequences now.  */
1011           if (elem->mbs == NULL && seq != NULL)
1012             {
1013               elem->mbs = obstack_copy0 (&collate->mempool,
1014                                          seq->bytes, seq->nbytes);
1015               elem->nmbs = seq->nbytes;
1016             }
1017
1018           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1019             {
1020               uint32_t wcs[2] = { wc, 0 };
1021
1022               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1023               elem->nwcs = 1;
1024             }
1025         }
1026     }
1027
1028   /* Test whether this element is not already in the list.  */
1029   if (elem->next != NULL || elem == collate->cursor)
1030     {
1031       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1032                 (int) symlen, symstr, elem->file, elem->line);
1033       lr_ignore_rest (ldfile, 0);
1034       return 1;
1035     }
1036
1037   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1038
1039   return 0;
1040 }
1041
1042
1043 static void
1044 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1045                  enum token_t ellipsis, const struct charmap_t *charmap,
1046                  struct repertoire_t *repertoire,
1047                  struct localedef_t *result)
1048 {
1049   struct element_t *startp;
1050   struct element_t *endp;
1051   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1052
1053   /* Unlink the entry added for the ellipsis.  */
1054   unlink_element (collate);
1055   startp = collate->cursor;
1056
1057   /* Process and add the end-entry.  */
1058   if (symstr != NULL
1059       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1060     /* Something went wrong with inserting the to-value.  This means
1061        we cannot process the ellipsis.  */
1062     return;
1063
1064   /* Reset the cursor.  */
1065   collate->cursor = startp;
1066
1067   /* Now we have to handle many different situations:
1068      - we have to distinguish between the three different ellipsis forms
1069      - the is the ellipsis at the beginning, in the middle, or at the end.
1070   */
1071   endp = collate->cursor->next;
1072   assert (symstr == NULL || endp != NULL);
1073
1074   /* XXX The following is probably very wrong since also collating symbols
1075      can appear in ranges.  But do we want/can refine the test for that?  */
1076 #if 0
1077   /* Both, the start and the end symbol, must stand for characters.  */
1078   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1079       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1080     {
1081       lr_error (ldfile, _("\
1082 %s: the start and the end symbol of a range must stand for characters"),
1083                 "LC_COLLATE");
1084       return;
1085     }
1086 #endif
1087
1088   if (ellipsis == tok_ellipsis3)
1089     {
1090       /* One requirement we make here: the length of the byte
1091          sequences for the first and end character must be the same.
1092          This is mainly to prevent unwanted effects and this is often
1093          not what is wanted.  */
1094       size_t len = (startp->mbs != NULL ? startp->nmbs
1095                     : (endp->mbs != NULL ? endp->nmbs : 0));
1096       char mbcnt[len + 1];
1097       char mbend[len + 1];
1098
1099       /* Well, this should be caught somewhere else already.  Just to
1100          make sure.  */
1101       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1102       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1103
1104       if (startp != NULL && endp != NULL
1105           && startp->mbs != NULL && endp->mbs != NULL
1106           && startp->nmbs != endp->nmbs)
1107         {
1108           lr_error (ldfile, _("\
1109 %s: byte sequences of first and last character must have the same length"),
1110                     "LC_COLLATE");
1111           return;
1112         }
1113
1114       /* Determine whether we have to generate multibyte sequences.  */
1115       if ((startp == NULL || startp->mbs != NULL)
1116           && (endp == NULL || endp->mbs != NULL))
1117         {
1118           int cnt;
1119           int ret;
1120
1121           /* Prepare the beginning byte sequence.  This is either from the
1122              beginning byte sequence or it is all nulls if it was an
1123              initial ellipsis.  */
1124           if (startp == NULL || startp->mbs == NULL)
1125             memset (mbcnt, '\0', len);
1126           else
1127             {
1128               memcpy (mbcnt, startp->mbs, len);
1129
1130               /* And increment it so that the value is the first one we will
1131                  try to insert.  */
1132               for (cnt = len - 1; cnt >= 0; --cnt)
1133                 if (++mbcnt[cnt] != '\0')
1134                   break;
1135             }
1136           mbcnt[len] = '\0';
1137
1138           /* And the end sequence.  */
1139           if (endp == NULL || endp->mbs == NULL)
1140             memset (mbend, '\0', len);
1141           else
1142             memcpy (mbend, endp->mbs, len);
1143           mbend[len] = '\0';
1144
1145           /* Test whether we have a correct range.  */
1146           ret = memcmp (mbcnt, mbend, len);
1147           if (ret >= 0)
1148             {
1149               if (ret > 0)
1150                 lr_error (ldfile, _("%s: byte sequence of first character of \
1151 sequence is not lower than that of the last character"), "LC_COLLATE");
1152               return;
1153             }
1154
1155           /* Generate the byte sequences data.  */
1156           while (1)
1157             {
1158               struct charseq *seq;
1159
1160               /* Quite a bit of work ahead.  We have to find the character
1161                  definition for the byte sequence and then determine the
1162                  wide character belonging to it.  */
1163               seq = charmap_find_symbol (charmap, mbcnt, len);
1164               if (seq != NULL)
1165                 {
1166                   struct element_t *elem;
1167                   size_t namelen;
1168
1169                   /* I don't this this can ever happen.  */
1170                   assert (seq->name != NULL);
1171                   namelen = strlen (seq->name);
1172
1173                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1174                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1175                                                        namelen);
1176
1177                   /* Now we are ready to insert the new value in the
1178                      sequence.  Find out whether the element is
1179                      already known.  */
1180                   void *ptr;
1181                   if (find_entry (&collate->seq_table, seq->name, namelen,
1182                                   &ptr) != 0)
1183                     {
1184                       uint32_t wcs[2] = { seq->ucs4, 0 };
1185
1186                       /* We have to allocate an entry.  */
1187                       elem = new_element (collate, mbcnt, len,
1188                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1189                                           ? NULL : wcs, seq->name,
1190                                           namelen, 1);
1191
1192                       /* And add it to the table.  */
1193                       if (insert_entry (&collate->seq_table, seq->name,
1194                                         namelen, elem) != 0)
1195                         /* This cannot happen.  */
1196                         assert (! "Internal error");
1197                     }
1198                   else
1199                     /* Copy the result.  */
1200                     elem = ptr;
1201
1202                   /* Test whether this element is not already in the list.  */
1203                   if (elem->next != NULL || (collate->cursor != NULL
1204                                              && elem->next == collate->cursor))
1205                     {
1206                       lr_error (ldfile, _("\
1207 order for `%.*s' already defined at %s:%Zu"),
1208                                 (int) namelen, seq->name,
1209                                 elem->file, elem->line);
1210                       goto increment;
1211                     }
1212
1213                   /* Enqueue the new element.  */
1214                   elem->last = collate->cursor;
1215                   if (collate->cursor == NULL)
1216                     elem->next = NULL;
1217                   else
1218                     {
1219                       elem->next = collate->cursor->next;
1220                       elem->last->next = elem;
1221                       if (elem->next != NULL)
1222                         elem->next->last = elem;
1223                     }
1224                   if (collate->start == NULL)
1225                     {
1226                       assert (collate->cursor == NULL);
1227                       collate->start = elem;
1228                     }
1229                   collate->cursor = elem;
1230
1231                  /* Add the weight value.  We take them from the
1232                     `ellipsis_weights' member of `collate'.  */
1233                   elem->weights = (struct element_list_t *)
1234                     obstack_alloc (&collate->mempool,
1235                                    nrules * sizeof (struct element_list_t));
1236                   for (cnt = 0; cnt < nrules; ++cnt)
1237                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1238                         && (collate->ellipsis_weight.weights[cnt].w[0]
1239                             == ELEMENT_ELLIPSIS2))
1240                       {
1241                         elem->weights[cnt].w = (struct element_t **)
1242                           obstack_alloc (&collate->mempool,
1243                                          sizeof (struct element_t *));
1244                         elem->weights[cnt].w[0] = elem;
1245                         elem->weights[cnt].cnt = 1;
1246                       }
1247                     else
1248                       {
1249                         /* Simply use the weight from `ellipsis_weight'.  */
1250                         elem->weights[cnt].w =
1251                           collate->ellipsis_weight.weights[cnt].w;
1252                         elem->weights[cnt].cnt =
1253                           collate->ellipsis_weight.weights[cnt].cnt;
1254                       }
1255                 }
1256
1257               /* Increment for the next round.  */
1258             increment:
1259               for (cnt = len - 1; cnt >= 0; --cnt)
1260                 if (++mbcnt[cnt] != '\0')
1261                   break;
1262
1263               /* Find out whether this was all.  */
1264               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1265                 /* Yep, that's all.  */
1266                 break;
1267             }
1268         }
1269     }
1270   else
1271     {
1272       /* For symbolic range we naturally must have a beginning and an
1273          end specified by the user.  */
1274       if (startp == NULL)
1275         lr_error (ldfile, _("\
1276 %s: symbolic range ellipsis must not directly follow `order_start'"),
1277                   "LC_COLLATE");
1278       else if (endp == NULL)
1279         lr_error (ldfile, _("\
1280 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1281                   "LC_COLLATE");
1282       else
1283         {
1284           /* Determine the range.  To do so we have to determine the
1285              common prefix of the both names and then the numeric
1286              values of both ends.  */
1287           size_t lenfrom = strlen (startp->name);
1288           size_t lento = strlen (endp->name);
1289           char buf[lento + 1];
1290           int preflen = 0;
1291           long int from;
1292           long int to;
1293           char *cp;
1294           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1295
1296           if (lenfrom != lento)
1297             {
1298             invalid_range:
1299               lr_error (ldfile, _("\
1300 `%s' and `%.*s' are no valid names for symbolic range"),
1301                         startp->name, (int) lento, endp->name);
1302               return;
1303             }
1304
1305           while (startp->name[preflen] == endp->name[preflen])
1306             if (startp->name[preflen] == '\0')
1307               /* Nothing to be done.  The start and end point are identical
1308                  and while inserting the end point we have already given
1309                  the user an error message.  */
1310               return;
1311             else
1312               ++preflen;
1313
1314           errno = 0;
1315           from = strtol (startp->name + preflen, &cp, base);
1316           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1317             goto invalid_range;
1318
1319           errno = 0;
1320           to = strtol (endp->name + preflen, &cp, base);
1321           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1322             goto invalid_range;
1323
1324           /* Copy the prefix.  */
1325           memcpy (buf, startp->name, preflen);
1326
1327           /* Loop over all values.  */
1328           for (++from; from < to; ++from)
1329             {
1330               struct element_t *elem = NULL;
1331               struct charseq *seq;
1332               uint32_t wc;
1333               int cnt;
1334
1335               /* Generate the the name.  */
1336               sprintf (buf + preflen, base == 10 ? "%ld" : "%lX", from);
1337
1338               /* Look whether this name is already defined.  */
1339               void *ptr;
1340               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1341                 {
1342                   /* Copy back the result.  */
1343                   elem = ptr;
1344
1345                   if (elem->next != NULL || (collate->cursor != NULL
1346                                              && elem->next == collate->cursor))
1347                     {
1348                       lr_error (ldfile, _("\
1349 %s: order for `%.*s' already defined at %s:%Zu"),
1350                                 "LC_COLLATE", (int) lenfrom, buf,
1351                                 elem->file, elem->line);
1352                       continue;
1353                     }
1354
1355                   if (elem->name == NULL)
1356                     {
1357                       lr_error (ldfile, _("%s: `%s' must be a character"),
1358                                 "LC_COLLATE", buf);
1359                       continue;
1360                     }
1361                 }
1362
1363               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1364                 {
1365                   /* Search for a character of this name.  */
1366                   seq = charmap_find_value (charmap, buf, lenfrom);
1367                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1368                     {
1369                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1370
1371                       if (seq != NULL)
1372                         seq->ucs4 = wc;
1373                     }
1374                   else
1375                     wc = seq->ucs4;
1376
1377                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1378                     /* We don't know anything about a character with this
1379                        name.  XXX Should we warn?  */
1380                     continue;
1381
1382                   if (elem == NULL)
1383                     {
1384                       uint32_t wcs[2] = { wc, 0 };
1385
1386                       /* We have to allocate an entry.  */
1387                       elem = new_element (collate,
1388                                           seq != NULL ? seq->bytes : NULL,
1389                                           seq != NULL ? seq->nbytes : 0,
1390                                           wc == ILLEGAL_CHAR_VALUE
1391                                           ? NULL : wcs, buf, lenfrom, 1);
1392                     }
1393                   else
1394                     {
1395                       /* Update the element.  */
1396                       if (seq != NULL)
1397                         {
1398                           elem->mbs = obstack_copy0 (&collate->mempool,
1399                                                      seq->bytes, seq->nbytes);
1400                           elem->nmbs = seq->nbytes;
1401                         }
1402
1403                       if (wc != ILLEGAL_CHAR_VALUE)
1404                         {
1405                           uint32_t zero = 0;
1406
1407                           obstack_grow (&collate->mempool,
1408                                         &wc, sizeof (uint32_t));
1409                           obstack_grow (&collate->mempool,
1410                                         &zero, sizeof (uint32_t));
1411                           elem->wcs = obstack_finish (&collate->mempool);
1412                           elem->nwcs = 1;
1413                         }
1414                     }
1415
1416                   elem->file = ldfile->fname;
1417                   elem->line = ldfile->lineno;
1418                   elem->section = collate->current_section;
1419                 }
1420
1421               /* Enqueue the new element.  */
1422               elem->last = collate->cursor;
1423               elem->next = collate->cursor->next;
1424               elem->last->next = elem;
1425               if (elem->next != NULL)
1426                 elem->next->last = elem;
1427               collate->cursor = elem;
1428
1429               /* Now add the weights.  They come from the `ellipsis_weights'
1430                  member of `collate'.  */
1431               elem->weights = (struct element_list_t *)
1432                 obstack_alloc (&collate->mempool,
1433                                nrules * sizeof (struct element_list_t));
1434               for (cnt = 0; cnt < nrules; ++cnt)
1435                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1436                     && (collate->ellipsis_weight.weights[cnt].w[0]
1437                         == ELEMENT_ELLIPSIS2))
1438                   {
1439                     elem->weights[cnt].w = (struct element_t **)
1440                       obstack_alloc (&collate->mempool,
1441                                      sizeof (struct element_t *));
1442                     elem->weights[cnt].w[0] = elem;
1443                     elem->weights[cnt].cnt = 1;
1444                   }
1445                 else
1446                   {
1447                     /* Simly use the weight from `ellipsis_weight'.  */
1448                     elem->weights[cnt].w =
1449                       collate->ellipsis_weight.weights[cnt].w;
1450                     elem->weights[cnt].cnt =
1451                       collate->ellipsis_weight.weights[cnt].cnt;
1452                   }
1453             }
1454         }
1455     }
1456 }
1457
1458
1459 static void
1460 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1461                  struct localedef_t *copy_locale, int ignore_content)
1462 {
1463   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1464     {
1465       struct locale_collate_t *collate;
1466
1467       if (copy_locale == NULL)
1468         {
1469           collate = locale->categories[LC_COLLATE].collate =
1470             (struct locale_collate_t *)
1471             xcalloc (1, sizeof (struct locale_collate_t));
1472
1473           /* Init the various data structures.  */
1474           init_hash (&collate->elem_table, 100);
1475           init_hash (&collate->sym_table, 100);
1476           init_hash (&collate->seq_table, 500);
1477           obstack_init (&collate->mempool);
1478
1479           collate->col_weight_max = -1;
1480         }
1481       else
1482         /* Reuse the copy_locale's data structures.  */
1483         collate = locale->categories[LC_COLLATE].collate =
1484           copy_locale->categories[LC_COLLATE].collate;
1485     }
1486
1487   ldfile->translate_strings = 0;
1488   ldfile->return_widestr = 0;
1489 }
1490
1491
1492 void
1493 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1494 {
1495   /* Now is the time when we can assign the individual collation
1496      values for all the symbols.  We have possibly different values
1497      for the wide- and the multibyte-character symbols.  This is done
1498      since it might make a difference in the encoding if there is in
1499      some cases no multibyte-character but there are wide-characters.
1500      (The other way around it is not important since theencoded
1501      collation value in the wide-character case is 32 bits wide and
1502      therefore requires no encoding).
1503
1504      The lowest collation value assigned is 2.  Zero is reserved for
1505      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1506      functions and 1 is used to separate the individual passes for the
1507      different rules.
1508
1509      We also have to construct is list with all the bytes/words which
1510      can come first in a sequence, followed by all the elements which
1511      also start with this byte/word.  The order is reverse which has
1512      among others the important effect that longer strings are located
1513      first in the list.  This is required for the output data since
1514      the algorithm used in `strcoll' etc depends on this.
1515
1516      The multibyte case is easy.  We simply sort into an array with
1517      256 elements.  */
1518   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1519   int mbact[nrules];
1520   int wcact;
1521   int mbseqact;
1522   int wcseqact;
1523   struct element_t *runp;
1524   int i;
1525   int need_undefined = 0;
1526   struct section_list *sect;
1527   int ruleidx;
1528   int nr_wide_elems = 0;
1529
1530   if (collate == NULL)
1531     {
1532       /* No data, no check.  */
1533       if (! be_quiet)
1534         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1535                                 "LC_COLLATE"));
1536       return;
1537     }
1538
1539   /* If this assertion is hit change the type in `element_t'.  */
1540   assert (nrules <= sizeof (runp->used_in_level) * 8);
1541
1542   /* Make sure that the `position' rule is used either in all sections
1543      or in none.  */
1544   for (i = 0; i < nrules; ++i)
1545     for (sect = collate->sections; sect != NULL; sect = sect->next)
1546       if (sect->rules != NULL
1547           && ((sect->rules[i] & sort_position)
1548               != (collate->sections->rules[i] & sort_position)))
1549         {
1550           WITH_CUR_LOCALE (error (0, 0, _("\
1551 %s: `position' must be used for a specific level in all sections or none"),
1552                                   "LC_COLLATE"));
1553           break;
1554         }
1555
1556   /* Find out which elements are used at which level.  At the same
1557      time we find out whether we have any undefined symbols.  */
1558   runp = collate->start;
1559   while (runp != NULL)
1560     {
1561       if (runp->mbs != NULL)
1562         {
1563           for (i = 0; i < nrules; ++i)
1564             {
1565               int j;
1566
1567               for (j = 0; j < runp->weights[i].cnt; ++j)
1568                 /* A NULL pointer as the weight means IGNORE.  */
1569                 if (runp->weights[i].w[j] != NULL)
1570                   {
1571                     if (runp->weights[i].w[j]->weights == NULL)
1572                       {
1573                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1574                                                         runp->line,
1575                                                         _("symbol `%s' not defined"),
1576                                                         runp->weights[i].w[j]->name));
1577
1578                         need_undefined = 1;
1579                         runp->weights[i].w[j] = &collate->undefined;
1580                       }
1581                     else
1582                       /* Set the bit for the level.  */
1583                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1584                   }
1585             }
1586         }
1587
1588       /* Up to the next entry.  */
1589       runp = runp->next;
1590     }
1591
1592   /* Walk through the list of defined sequences and assign weights.  Also
1593      create the data structure which will allow generating the single byte
1594      character based tables.
1595
1596      Since at each time only the weights for each of the rules are
1597      only compared to other weights for this rule it is possible to
1598      assign more compact weight values than simply counting all
1599      weights in sequence.  We can assign weights from 3, one for each
1600      rule individually and only for those elements, which are actually
1601      used for this rule.
1602
1603      Why is this important?  It is not for the wide char table.  But
1604      it is for the singlebyte output since here larger numbers have to
1605      be encoded to make it possible to emit the value as a byte
1606      string.  */
1607   for (i = 0; i < nrules; ++i)
1608     mbact[i] = 2;
1609   wcact = 2;
1610   mbseqact = 0;
1611   wcseqact = 0;
1612   runp = collate->start;
1613   while (runp != NULL)
1614     {
1615       /* Determine the order.  */
1616       if (runp->used_in_level != 0)
1617         {
1618           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1619                                                  nrules * sizeof (int));
1620
1621           for (i = 0; i < nrules; ++i)
1622             if ((runp->used_in_level & (1 << i)) != 0)
1623               runp->mborder[i] = mbact[i]++;
1624             else
1625               runp->mborder[i] = 0;
1626         }
1627
1628       if (runp->mbs != NULL)
1629         {
1630           struct element_t **eptr;
1631           struct element_t *lastp = NULL;
1632
1633           /* Find the point where to insert in the list.  */
1634           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1635           while (*eptr != NULL)
1636             {
1637               if ((*eptr)->nmbs < runp->nmbs)
1638                 break;
1639
1640               if ((*eptr)->nmbs == runp->nmbs)
1641                 {
1642                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1643
1644                   if (c == 0)
1645                     {
1646                       /* This should not happen.  It means that we have
1647                          to symbols with the same byte sequence.  It is
1648                          of course an error.  */
1649                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1650                                                       (*eptr)->line,
1651                                                       _("\
1652 symbol `%s' has the same encoding as"), (*eptr)->name);
1653                                        error_at_line (0, 0, runp->file,
1654                                                       runp->line,
1655                                                       _("symbol `%s'"),
1656                                                       runp->name));
1657                       goto dont_insert;
1658                     }
1659                   else if (c < 0)
1660                     /* Insert it here.  */
1661                     break;
1662                 }
1663
1664               /* To the next entry.  */
1665               lastp = *eptr;
1666               eptr = &(*eptr)->mbnext;
1667             }
1668
1669           /* Set the pointers.  */
1670           runp->mbnext = *eptr;
1671           runp->mblast = lastp;
1672           if (*eptr != NULL)
1673             (*eptr)->mblast = runp;
1674           *eptr = runp;
1675         dont_insert:
1676           ;
1677         }
1678
1679       if (runp->used_in_level)
1680         {
1681           runp->wcorder = wcact++;
1682
1683           /* We take the opportunity to count the elements which have
1684              wide characters.  */
1685           ++nr_wide_elems;
1686         }
1687
1688       if (runp->is_character)
1689         {
1690           if (runp->nmbs == 1)
1691             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1692
1693           runp->wcseqorder = wcseqact++;
1694         }
1695       else if (runp->mbs != NULL && runp->weights != NULL)
1696         /* This is for collation elements.  */
1697         runp->wcseqorder = wcseqact++;
1698
1699       /* Up to the next entry.  */
1700       runp = runp->next;
1701     }
1702
1703   /* Find out whether any of the `mbheads' entries is unset.  In this
1704      case we use the UNDEFINED entry.  */
1705   for (i = 1; i < 256; ++i)
1706     if (collate->mbheads[i] == NULL)
1707       {
1708         need_undefined = 1;
1709         collate->mbheads[i] = &collate->undefined;
1710       }
1711
1712   /* Now to the wide character case.  */
1713   collate->wcheads.p = 6;
1714   collate->wcheads.q = 10;
1715   wchead_table_init (&collate->wcheads);
1716
1717   collate->wcseqorder.p = 6;
1718   collate->wcseqorder.q = 10;
1719   collseq_table_init (&collate->wcseqorder);
1720
1721   /* Start adding.  */
1722   runp = collate->start;
1723   while (runp != NULL)
1724     {
1725       if (runp->wcs != NULL)
1726         {
1727           struct element_t *e;
1728           struct element_t **eptr;
1729           struct element_t *lastp;
1730
1731           /* Insert the collation sequence value.  */
1732           if (runp->is_character)
1733             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1734                                runp->wcseqorder);
1735
1736           /* Find the point where to insert in the list.  */
1737           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1738           eptr = &e;
1739           lastp = NULL;
1740           while (*eptr != NULL)
1741             {
1742               if ((*eptr)->nwcs < runp->nwcs)
1743                 break;
1744
1745               if ((*eptr)->nwcs == runp->nwcs)
1746                 {
1747                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1748                                    (wchar_t *) runp->wcs, runp->nwcs);
1749
1750                   if (c == 0)
1751                     {
1752                       /* This should not happen.  It means that we have
1753                          two symbols with the same byte sequence.  It is
1754                          of course an error.  */
1755                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1756                                                       (*eptr)->line,
1757                                                       _("\
1758 symbol `%s' has the same encoding as"), (*eptr)->name);
1759                                        error_at_line (0, 0, runp->file,
1760                                                       runp->line,
1761                                                       _("symbol `%s'"),
1762                                                       runp->name));
1763                       goto dont_insertwc;
1764                     }
1765                   else if (c < 0)
1766                     /* Insert it here.  */
1767                     break;
1768                 }
1769
1770               /* To the next entry.  */
1771               lastp = *eptr;
1772               eptr = &(*eptr)->wcnext;
1773             }
1774
1775           /* Set the pointers.  */
1776           runp->wcnext = *eptr;
1777           runp->wclast = lastp;
1778           if (*eptr != NULL)
1779             (*eptr)->wclast = runp;
1780           *eptr = runp;
1781           if (eptr == &e)
1782             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1783         dont_insertwc:
1784           ;
1785         }
1786
1787       /* Up to the next entry.  */
1788       runp = runp->next;
1789     }
1790
1791   collseq_table_finalize (&collate->wcseqorder);
1792
1793   /* Now determine whether the UNDEFINED entry is needed and if yes,
1794      whether it was defined.  */
1795   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1796   if (collate->undefined.file == NULL)
1797     {
1798       if (need_undefined)
1799         {
1800           /* This seems not to be enforced by recent standards.  Don't
1801              emit an error, simply append UNDEFINED at the end.  */
1802           if (0)
1803             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1804
1805           /* Add UNDEFINED at the end.  */
1806           collate->undefined.mborder =
1807             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1808
1809           for (i = 0; i < nrules; ++i)
1810             collate->undefined.mborder[i] = mbact[i]++;
1811         }
1812
1813       /* In any case we will need the definition for the wide character
1814          case.  But we will not complain that it is missing since the
1815          specification strangely enough does not seem to account for
1816          this.  */
1817       collate->undefined.wcorder = wcact++;
1818     }
1819
1820   /* Finally, try to unify the rules for the sections.  Whenever the rules
1821      for a section are the same as those for another section give the
1822      ruleset the same index.  Since there are never many section we can
1823      use an O(n^2) algorithm here.  */
1824   sect = collate->sections;
1825   while (sect != NULL && sect->rules == NULL)
1826     sect = sect->next;
1827
1828   /* Bail out if we have no sections because of earlier errors.  */
1829   if (sect == NULL)
1830     {
1831       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1832                               _("too many errors; giving up")));
1833       return;
1834     }
1835
1836   ruleidx = 0;
1837   do
1838     {
1839       struct section_list *osect = collate->sections;
1840
1841       while (osect != sect)
1842         if (osect->rules != NULL
1843             && memcmp (osect->rules, sect->rules, nrules) == 0)
1844           break;
1845         else
1846           osect = osect->next;
1847
1848       if (osect == sect)
1849         sect->ruleidx = ruleidx++;
1850       else
1851         sect->ruleidx = osect->ruleidx;
1852
1853       /* Next section.  */
1854       do
1855         sect = sect->next;
1856       while (sect != NULL && sect->rules == NULL);
1857     }
1858   while (sect != NULL);
1859   /* We are currently not prepared for more than 128 rulesets.  But this
1860      should never really be a problem.  */
1861   assert (ruleidx <= 128);
1862 }
1863
1864
1865 static int32_t
1866 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1867                struct element_t *elem)
1868 {
1869   size_t cnt;
1870   int32_t retval;
1871
1872   /* Optimize the use of UNDEFINED.  */
1873   if (elem == &collate->undefined)
1874     /* The weights are already inserted.  */
1875     return 0;
1876
1877   /* This byte can start exactly one collation element and this is
1878      a single byte.  We can directly give the index to the weights.  */
1879   retval = obstack_object_size (pool);
1880
1881   /* Construct the weight.  */
1882   for (cnt = 0; cnt < nrules; ++cnt)
1883     {
1884       char buf[elem->weights[cnt].cnt * 7];
1885       int len = 0;
1886       int i;
1887
1888       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1889         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1890         if (elem->weights[cnt].w[i] != NULL)
1891           len += utf8_encode (&buf[len],
1892                               elem->weights[cnt].w[i]->mborder[cnt]);
1893
1894       /* And add the buffer content.  */
1895       obstack_1grow (pool, len);
1896       obstack_grow (pool, buf, len);
1897     }
1898
1899   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1900 }
1901
1902
1903 static int32_t
1904 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1905                  struct element_t *elem)
1906 {
1907   size_t cnt;
1908   int32_t retval;
1909
1910   /* Optimize the use of UNDEFINED.  */
1911   if (elem == &collate->undefined)
1912     /* The weights are already inserted.  */
1913     return 0;
1914
1915   /* This byte can start exactly one collation element and this is
1916      a single byte.  We can directly give the index to the weights.  */
1917   retval = obstack_object_size (pool) / sizeof (int32_t);
1918
1919   /* Construct the weight.  */
1920   for (cnt = 0; cnt < nrules; ++cnt)
1921     {
1922       int32_t buf[elem->weights[cnt].cnt];
1923       int i;
1924       int32_t j;
1925
1926       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1927         if (elem->weights[cnt].w[i] != NULL)
1928           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1929
1930       /* And add the buffer content.  */
1931       obstack_int32_grow (pool, j);
1932
1933       obstack_grow (pool, buf, j * sizeof (int32_t));
1934     }
1935
1936   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1937 }
1938
1939
1940 void
1941 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
1942                 const char *output_path)
1943 {
1944   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1945   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1946   struct iovec iov[2 + nelems];
1947   struct locale_file data;
1948   uint32_t idx[nelems];
1949   size_t cnt;
1950   size_t ch;
1951   int32_t tablemb[256];
1952   struct obstack weightpool;
1953   struct obstack extrapool;
1954   struct obstack indirectpool;
1955   struct section_list *sect;
1956   struct collidx_table tablewc;
1957   uint32_t elem_size;
1958   uint32_t *elem_table;
1959   int i;
1960   struct element_t *runp;
1961
1962   data.magic = LIMAGIC (LC_COLLATE);
1963   data.n = nelems;
1964   iov[0].iov_base = (void *) &data;
1965   iov[0].iov_len = sizeof (data);
1966
1967   iov[1].iov_base = (void *) idx;
1968   iov[1].iov_len = sizeof (idx);
1969
1970   idx[0] = iov[0].iov_len + iov[1].iov_len;
1971   cnt = 0;
1972
1973   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1974   iov[2 + cnt].iov_base = &nrules;
1975   iov[2 + cnt].iov_len = sizeof (uint32_t);
1976   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1977   ++cnt;
1978
1979   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
1980   if (collate == NULL)
1981     {
1982       int32_t dummy = 0;
1983
1984       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1985         {
1986           /* The words have to be handled specially.  */
1987           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1988             {
1989               iov[2 + cnt].iov_base = &dummy;
1990               iov[2 + cnt].iov_len = sizeof (int32_t);
1991             }
1992           else
1993             {
1994               iov[2 + cnt].iov_base = NULL;
1995               iov[2 + cnt].iov_len = 0;
1996             }
1997
1998           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1999             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2000           ++cnt;
2001         }
2002
2003       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2004
2005       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2006
2007       return;
2008     }
2009
2010   obstack_init (&weightpool);
2011   obstack_init (&extrapool);
2012   obstack_init (&indirectpool);
2013
2014   /* Since we are using the sign of an integer to mark indirection the
2015      offsets in the arrays we are indirectly referring to must not be
2016      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2017   obstack_int32_grow (&extrapool, 0);
2018   obstack_int32_grow (&indirectpool, 0);
2019
2020   /* Prepare the ruleset table.  */
2021   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2022     if (sect->rules != NULL && sect->ruleidx == i)
2023       {
2024         int j;
2025
2026         obstack_make_room (&weightpool, nrules);
2027
2028         for (j = 0; j < nrules; ++j)
2029           obstack_1grow_fast (&weightpool, sect->rules[j]);
2030         ++i;
2031       }
2032   /* And align the output.  */
2033   i = (nrules * i) % __alignof__ (int32_t);
2034   if (i > 0)
2035     do
2036       obstack_1grow (&weightpool, '\0');
2037     while (++i < __alignof__ (int32_t));
2038
2039   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2040   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2041   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2042   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2043   ++cnt;
2044
2045   /* Generate the 8-bit table.  Walk through the lists of sequences
2046      starting with the same byte and add them one after the other to
2047      the table.  In case we have more than one sequence starting with
2048      the same byte we have to use extra indirection.
2049
2050      First add a record for the NUL byte.  This entry will never be used
2051      so it does not matter.  */
2052   tablemb[0] = 0;
2053
2054   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2055      will probably be used more than once it is good to store the
2056      weights only once.  */
2057   if (collate->undefined.used_in_level != 0)
2058     output_weight (&weightpool, collate, &collate->undefined);
2059
2060   for (ch = 1; ch < 256; ++ch)
2061     if (collate->mbheads[ch]->mbnext == NULL
2062         && collate->mbheads[ch]->nmbs <= 1)
2063       {
2064         tablemb[ch] = output_weight (&weightpool, collate,
2065                                      collate->mbheads[ch]);
2066       }
2067     else
2068       {
2069         /* The entries in the list are sorted by length and then
2070            alphabetically.  This is the order in which we will add the
2071            elements to the collation table.  This allows simply walking
2072            the table in sequence and stopping at the first matching
2073            entry.  Since the longer sequences are coming first in the
2074            list they have the possibility to match first, just as it
2075            has to be.  In the worst case we are walking to the end of
2076            the list where we put, if no singlebyte sequence is defined
2077            in the locale definition, the weights for UNDEFINED.
2078
2079            To reduce the length of the search list we compress them a bit.
2080            This happens by collecting sequences of consecutive byte
2081            sequences in one entry (having and begin and end byte sequence)
2082            and add only one index into the weight table.  We can find the
2083            consecutive entries since they are also consecutive in the list.  */
2084         struct element_t *runp = collate->mbheads[ch];
2085         struct element_t *lastp;
2086
2087         assert ((obstack_object_size (&extrapool)
2088                  & (__alignof__ (int32_t) - 1)) == 0);
2089
2090         tablemb[ch] = -obstack_object_size (&extrapool);
2091
2092         do
2093           {
2094             /* Store the current index in the weight table.  We know that
2095                the current position in the `extrapool' is aligned on a
2096                32-bit address.  */
2097             int32_t weightidx;
2098             int added;
2099
2100             /* Find out wether this is a single entry or we have more than
2101                one consecutive entry.  */
2102             if (runp->mbnext != NULL
2103                 && runp->nmbs == runp->mbnext->nmbs
2104                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2105                 && (runp->mbs[runp->nmbs - 1]
2106                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2107               {
2108                 int i;
2109                 struct element_t *series_startp = runp;
2110                 struct element_t *curp;
2111
2112                 /* Compute how much space we will need.  */
2113                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2114                           + __alignof__ (int32_t) - 1)
2115                          & ~(__alignof__ (int32_t) - 1));
2116                 assert ((obstack_object_size (&extrapool)
2117                          & (__alignof__ (int32_t) - 1)) == 0);
2118                 obstack_make_room (&extrapool, added);
2119
2120                 /* More than one consecutive entry.  We mark this by having
2121                    a negative index into the indirect table.  */
2122                 obstack_int32_grow_fast (&extrapool,
2123                                          -(obstack_object_size (&indirectpool)
2124                                            / sizeof (int32_t)));
2125
2126                 /* Now search first the end of the series.  */
2127                 do
2128                   runp = runp->mbnext;
2129                 while (runp->mbnext != NULL
2130                        && runp->nmbs == runp->mbnext->nmbs
2131                        && memcmp (runp->mbs, runp->mbnext->mbs,
2132                                   runp->nmbs - 1) == 0
2133                        && (runp->mbs[runp->nmbs - 1]
2134                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2135
2136                 /* Now walk backward from here to the beginning.  */
2137                 curp = runp;
2138
2139                 assert (runp->nmbs <= 256);
2140                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2141                 for (i = 1; i < curp->nmbs; ++i)
2142                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2143
2144                 /* Now find the end of the consecutive sequence and
2145                    add all the indeces in the indirect pool.  */
2146                 do
2147                   {
2148                     weightidx = output_weight (&weightpool, collate, curp);
2149                     obstack_int32_grow (&indirectpool, weightidx);
2150
2151                     curp = curp->mblast;
2152                   }
2153                 while (curp != series_startp);
2154
2155                 /* Add the final weight.  */
2156                 weightidx = output_weight (&weightpool, collate, curp);
2157                 obstack_int32_grow (&indirectpool, weightidx);
2158
2159                 /* And add the end byte sequence.  Without length this
2160                    time.  */
2161                 for (i = 1; i < curp->nmbs; ++i)
2162                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2163               }
2164             else
2165               {
2166                 /* A single entry.  Simply add the index and the length and
2167                    string (except for the first character which is already
2168                    tested for).  */
2169                 int i;
2170
2171                 /* Output the weight info.  */
2172                 weightidx = output_weight (&weightpool, collate, runp);
2173
2174                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2175                           + __alignof__ (int32_t) - 1)
2176                          & ~(__alignof__ (int32_t) - 1));
2177                 assert ((obstack_object_size (&extrapool)
2178                          & (__alignof__ (int32_t) - 1)) == 0);
2179                 obstack_make_room (&extrapool, added);
2180
2181                 obstack_int32_grow_fast (&extrapool, weightidx);
2182                 assert (runp->nmbs <= 256);
2183                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2184
2185                 for (i = 1; i < runp->nmbs; ++i)
2186                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2187               }
2188
2189             /* Add alignment bytes if necessary.  */
2190             while ((obstack_object_size (&extrapool)
2191                     & (__alignof__ (int32_t) - 1)) != 0)
2192               obstack_1grow_fast (&extrapool, '\0');
2193
2194             /* Next entry.  */
2195             lastp = runp;
2196             runp = runp->mbnext;
2197           }
2198         while (runp != NULL);
2199
2200         assert ((obstack_object_size (&extrapool)
2201                  & (__alignof__ (int32_t) - 1)) == 0);
2202
2203         /* If the final entry in the list is not a single character we
2204            add an UNDEFINED entry here.  */
2205         if (lastp->nmbs != 1)
2206           {
2207             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2208                          & ~(__alignof__ (int32_t) - 1));
2209             obstack_make_room (&extrapool, added);
2210
2211             obstack_int32_grow_fast (&extrapool, 0);
2212             /* XXX What rule? We just pick the first.  */
2213             obstack_1grow_fast (&extrapool, 0);
2214             /* Length is zero.  */
2215             obstack_1grow_fast (&extrapool, 0);
2216
2217             /* Add alignment bytes if necessary.  */
2218             while ((obstack_object_size (&extrapool)
2219                     & (__alignof__ (int32_t) - 1)) != 0)
2220               obstack_1grow_fast (&extrapool, '\0');
2221           }
2222       }
2223
2224   /* Add padding to the tables if necessary.  */
2225   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2226          != 0)
2227     obstack_1grow (&weightpool, 0);
2228
2229   /* Now add the four tables.  */
2230   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2231   iov[2 + cnt].iov_base = tablemb;
2232   iov[2 + cnt].iov_len = sizeof (tablemb);
2233   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2234   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2235   ++cnt;
2236
2237   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2238   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2239   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2240   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2241   ++cnt;
2242
2243   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2244   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2245   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2246   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2247   ++cnt;
2248
2249   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2250   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2251   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2252   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2253   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2254   ++cnt;
2255
2256
2257   /* Now the same for the wide character table.  We need to store some
2258      more information here.  */
2259   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2260   iov[2 + cnt].iov_base = NULL;
2261   iov[2 + cnt].iov_len = 0;
2262   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2263   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2264   ++cnt;
2265
2266   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2267   iov[2 + cnt].iov_base = NULL;
2268   iov[2 + cnt].iov_len = 0;
2269   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2270   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2271   ++cnt;
2272
2273   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2274   iov[2 + cnt].iov_base = NULL;
2275   iov[2 + cnt].iov_len = 0;
2276   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2277   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2278   ++cnt;
2279
2280   /* Since we are using the sign of an integer to mark indirection the
2281      offsets in the arrays we are indirectly referring to must not be
2282      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2283   obstack_int32_grow (&extrapool, 0);
2284   obstack_int32_grow (&indirectpool, 0);
2285
2286   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2287      will probably be used more than once it is good to store the
2288      weights only once.  */
2289   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2290     abort ();
2291
2292   /* Generate the table.  Walk through the lists of sequences starting
2293      with the same wide character and add them one after the other to
2294      the table.  In case we have more than one sequence starting with
2295      the same byte we have to use extra indirection.  */
2296   {
2297     auto void add_to_tablewc (uint32_t ch, struct element_t *runp);
2298
2299     void add_to_tablewc (uint32_t ch, struct element_t *runp)
2300       {
2301         if (runp->wcnext == NULL && runp->nwcs == 1)
2302           {
2303             int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2304             collidx_table_add (&tablewc, ch, weigthidx);
2305           }
2306         else
2307           {
2308             /* As for the singlebyte table, we recognize sequences and
2309                compress them.  */
2310             struct element_t *lastp;
2311
2312             collidx_table_add (&tablewc, ch,
2313                                -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2314
2315             do
2316               {
2317                 /* Store the current index in the weight table.  We know that
2318                    the current position in the `extrapool' is aligned on a
2319                    32-bit address.  */
2320                 int32_t weightidx;
2321                 int added;
2322
2323                 /* Find out wether this is a single entry or we have more than
2324                    one consecutive entry.  */
2325                 if (runp->wcnext != NULL
2326                     && runp->nwcs == runp->wcnext->nwcs
2327                     && wmemcmp ((wchar_t *) runp->wcs,
2328                                 (wchar_t *)runp->wcnext->wcs,
2329                                 runp->nwcs - 1) == 0
2330                     && (runp->wcs[runp->nwcs - 1]
2331                         == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2332                   {
2333                     int i;
2334                     struct element_t *series_startp = runp;
2335                     struct element_t *curp;
2336
2337                     /* Now add first the initial byte sequence.  */
2338                     added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2339                     if (sizeof (int32_t) == sizeof (int))
2340                       obstack_make_room (&extrapool, added);
2341
2342                     /* More than one consecutive entry.  We mark this by having
2343                        a negative index into the indirect table.  */
2344                     obstack_int32_grow_fast (&extrapool,
2345                                              -(obstack_object_size (&indirectpool)
2346                                                / sizeof (int32_t)));
2347                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2348
2349                     do
2350                       runp = runp->wcnext;
2351                     while (runp->wcnext != NULL
2352                            && runp->nwcs == runp->wcnext->nwcs
2353                            && wmemcmp ((wchar_t *) runp->wcs,
2354                                        (wchar_t *)runp->wcnext->wcs,
2355                                        runp->nwcs - 1) == 0
2356                            && (runp->wcs[runp->nwcs - 1]
2357                                == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2358
2359                     /* Now walk backward from here to the beginning.  */
2360                     curp = runp;
2361
2362                     for (i = 1; i < runp->nwcs; ++i)
2363                       obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2364
2365                     /* Now find the end of the consecutive sequence and
2366                        add all the indeces in the indirect pool.  */
2367                     do
2368                       {
2369                         weightidx = output_weightwc (&weightpool, collate,
2370                                                      curp);
2371                         obstack_int32_grow (&indirectpool, weightidx);
2372
2373                         curp = curp->wclast;
2374                       }
2375                     while (curp != series_startp);
2376
2377                     /* Add the final weight.  */
2378                     weightidx = output_weightwc (&weightpool, collate, curp);
2379                     obstack_int32_grow (&indirectpool, weightidx);
2380
2381                     /* And add the end byte sequence.  Without length this
2382                        time.  */
2383                     for (i = 1; i < curp->nwcs; ++i)
2384                       obstack_int32_grow (&extrapool, curp->wcs[i]);
2385                   }
2386                 else
2387                   {
2388                     /* A single entry.  Simply add the index and the length and
2389                        string (except for the first character which is already
2390                        tested for).  */
2391                     int i;
2392
2393                     /* Output the weight info.  */
2394                     weightidx = output_weightwc (&weightpool, collate, runp);
2395
2396                     added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2397                     if (sizeof (int) == sizeof (int32_t))
2398                       obstack_make_room (&extrapool, added);
2399
2400                     obstack_int32_grow_fast (&extrapool, weightidx);
2401                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2402                     for (i = 1; i < runp->nwcs; ++i)
2403                       obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2404                   }
2405
2406                 /* Next entry.  */
2407                 lastp = runp;
2408                 runp = runp->wcnext;
2409               }
2410             while (runp != NULL);
2411           }
2412       }
2413
2414     tablewc.p = 6;
2415     tablewc.q = 10;
2416     collidx_table_init (&tablewc);
2417
2418     wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2419
2420     collidx_table_finalize (&tablewc);
2421   }
2422
2423   /* Now add the four tables.  */
2424   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2425   iov[2 + cnt].iov_base = tablewc.result;
2426   iov[2 + cnt].iov_len = tablewc.result_size;
2427   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2428   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2429   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2430   ++cnt;
2431
2432   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2433   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2434   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2435   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2436   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2437   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2438   ++cnt;
2439
2440   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2441   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2442   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2443   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2444   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2445   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2446   ++cnt;
2447
2448   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2449   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2450   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2451   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2452   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2453   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2454   ++cnt;
2455
2456
2457   /* Finally write the table with collation element names out.  It is
2458      a hash table with a simple function which gets the name of the
2459      character as the input.  One character might have many names.  The
2460      value associated with the name is an index into the weight table
2461      where we are then interested in the first-level weight value.
2462
2463      To determine how large the table should be we are counting the
2464      elements have to put in.  Since we are using internal chaining
2465      using a secondary hash function we have to make the table a bit
2466      larger to avoid extremely long search times.  We can achieve
2467      good results with a 40% larger table than there are entries.  */
2468   elem_size = 0;
2469   runp = collate->start;
2470   while (runp != NULL)
2471     {
2472       if (runp->mbs != NULL && runp->weights != NULL)
2473         /* Yep, the element really counts.  */
2474         ++elem_size;
2475
2476       runp = runp->next;
2477     }
2478   /* Add 40% and find the next prime number.  */
2479   elem_size = MIN (next_prime (elem_size * 1.4), 257);
2480
2481   /* Allocate the table.  Each entry consists of two words: the hash
2482      value and an index in a secondary table which provides the index
2483      into the weight table and the string itself (so that a match can
2484      be determined).  */
2485   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2486                                            elem_size * 2 * sizeof (uint32_t));
2487   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2488
2489   /* Now add the elements.  */
2490   runp = collate->start;
2491   while (runp != NULL)
2492     {
2493       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2494         {
2495           /* Compute the hash value of the name.  */
2496           uint32_t namelen = strlen (runp->name);
2497           uint32_t hash = elem_hash (runp->name, namelen);
2498           size_t idx = hash % elem_size;
2499
2500           if (elem_table[idx * 2] != 0)
2501             {
2502               /* The spot is already take.  Try iterating using the value
2503                  from the secondary hashing function.  */
2504               size_t iter = hash % (elem_size - 2);
2505
2506               do
2507                 {
2508                   idx += iter;
2509                   if (idx >= elem_size)
2510                     idx -= elem_size;
2511                 }
2512               while (elem_table[idx * 2] != 0);
2513             }
2514           /* This is the spot where we will insert the value.  */
2515           elem_table[idx * 2] = hash;
2516           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2517
2518           /* The the string itself including length.  */
2519           obstack_1grow (&extrapool, namelen);
2520           obstack_grow (&extrapool, runp->name, namelen);
2521
2522           /* And the multibyte representation.  */
2523           obstack_1grow (&extrapool, runp->nmbs);
2524           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2525
2526           /* And align again to 32 bits.  */
2527           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2528             obstack_grow (&extrapool, "\0\0",
2529                           (sizeof (int32_t)
2530                            - ((1 + namelen + 1 + runp->nmbs)
2531                               % sizeof (int32_t))));
2532
2533           /* Now some 32-bit values: multibyte collation sequence,
2534              wide char string (including length), and wide char
2535              collation sequence.  */
2536           obstack_int32_grow (&extrapool, runp->mbseqorder);
2537
2538           obstack_int32_grow (&extrapool, runp->nwcs);
2539           obstack_grow (&extrapool, runp->wcs,
2540                         runp->nwcs * sizeof (uint32_t));
2541
2542           obstack_int32_grow (&extrapool, runp->wcseqorder);
2543         }
2544
2545       runp = runp->next;
2546     }
2547
2548   /* Prepare to write out this data.  */
2549   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2550   iov[2 + cnt].iov_base = &elem_size;
2551   iov[2 + cnt].iov_len = sizeof (int32_t);
2552   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2553   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2554   ++cnt;
2555
2556   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2557   iov[2 + cnt].iov_base = elem_table;
2558   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2559   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2560   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2561   ++cnt;
2562
2563   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2564   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2565   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2566   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2567   ++cnt;
2568
2569   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2570   iov[2 + cnt].iov_base = collate->mbseqorder;
2571   iov[2 + cnt].iov_len = 256;
2572   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2573   ++cnt;
2574
2575   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2576   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2577   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2578   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2579   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2580   ++cnt;
2581
2582   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2583   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2584   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2585   ++cnt;
2586
2587   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2588
2589   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2590
2591   obstack_free (&weightpool, NULL);
2592   obstack_free (&extrapool, NULL);
2593   obstack_free (&indirectpool, NULL);
2594 }
2595
2596
2597 void
2598 collate_read (struct linereader *ldfile, struct localedef_t *result,
2599               const struct charmap_t *charmap, const char *repertoire_name,
2600               int ignore_content)
2601 {
2602   struct repertoire_t *repertoire = NULL;
2603   struct locale_collate_t *collate;
2604   struct token *now;
2605   struct token *arg = NULL;
2606   enum token_t nowtok;
2607   enum token_t was_ellipsis = tok_none;
2608   struct localedef_t *copy_locale = NULL;
2609   /* Parsing state:
2610      0 - start
2611      1 - between `order-start' and `order-end'
2612      2 - after `order-end'
2613      3 - after `reorder-after', waiting for `reorder-end'
2614      4 - after `reorder-end'
2615      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2616      6 - after `reorder-sections-end'
2617   */
2618   int state = 0;
2619
2620   /* Get the repertoire we have to use.  */
2621   if (repertoire_name != NULL)
2622     repertoire = repertoire_read (repertoire_name);
2623
2624   /* The rest of the line containing `LC_COLLATE' must be free.  */
2625   lr_ignore_rest (ldfile, 1);
2626
2627   do
2628     {
2629       now = lr_token (ldfile, charmap, result, NULL, verbose);
2630       nowtok = now->tok;
2631     }
2632   while (nowtok == tok_eol);
2633
2634   if (nowtok == tok_copy)
2635     {
2636       state = 2;
2637       now = lr_token (ldfile, charmap, result, NULL, verbose);
2638       if (now->tok != tok_string)
2639         {
2640           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2641
2642         skip_category:
2643           do
2644             now = lr_token (ldfile, charmap, result, NULL, verbose);
2645           while (now->tok != tok_eof && now->tok != tok_end);
2646
2647           if (now->tok != tok_eof
2648               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2649                   now->tok == tok_eof))
2650             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2651           else if (now->tok != tok_lc_collate)
2652             {
2653               lr_error (ldfile, _("\
2654 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2655               lr_ignore_rest (ldfile, 0);
2656             }
2657           else
2658             lr_ignore_rest (ldfile, 1);
2659
2660           return;
2661         }
2662
2663       if (! ignore_content)
2664         {
2665           /* Get the locale definition.  */
2666           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2667                                      repertoire_name, charmap, NULL);
2668           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2669             {
2670               /* Not yet loaded.  So do it now.  */
2671               if (locfile_read (copy_locale, charmap) != 0)
2672                 goto skip_category;
2673             }
2674         }
2675
2676       lr_ignore_rest (ldfile, 1);
2677
2678       now = lr_token (ldfile, charmap, result, NULL, verbose);
2679       nowtok = now->tok;
2680     }
2681
2682   /* Prepare the data structures.  */
2683   collate_startup (ldfile, result, copy_locale, ignore_content);
2684   collate = result->categories[LC_COLLATE].collate;
2685
2686   while (1)
2687     {
2688       char ucs4buf[10];
2689       char *symstr;
2690       size_t symlen;
2691
2692       /* Of course we don't proceed beyond the end of file.  */
2693       if (nowtok == tok_eof)
2694         break;
2695
2696       /* Ingore empty lines.  */
2697       if (nowtok == tok_eol)
2698         {
2699           now = lr_token (ldfile, charmap, result, NULL, verbose);
2700           nowtok = now->tok;
2701           continue;
2702         }
2703
2704       switch (nowtok)
2705         {
2706         case tok_copy:
2707           /* Allow copying other locales.  */
2708           now = lr_token (ldfile, charmap, result, NULL, verbose);
2709           if (now->tok != tok_string)
2710             goto err_label;
2711
2712           if (! ignore_content)
2713             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2714                          charmap, result);
2715
2716           lr_ignore_rest (ldfile, 1);
2717           break;
2718
2719         case tok_coll_weight_max:
2720           /* Ignore the rest of the line if we don't need the input of
2721              this line.  */
2722           if (ignore_content)
2723             {
2724               lr_ignore_rest (ldfile, 0);
2725               break;
2726             }
2727
2728           if (state != 0)
2729             goto err_label;
2730
2731           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2732           if (arg->tok != tok_number)
2733             goto err_label;
2734           if (collate->col_weight_max != -1)
2735             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2736                       "LC_COLLATE", "col_weight_max");
2737           else
2738             collate->col_weight_max = arg->val.num;
2739           lr_ignore_rest (ldfile, 1);
2740           break;
2741
2742         case tok_section_symbol:
2743           /* Ignore the rest of the line if we don't need the input of
2744              this line.  */
2745           if (ignore_content)
2746             {
2747               lr_ignore_rest (ldfile, 0);
2748               break;
2749             }
2750
2751           if (state != 0)
2752             goto err_label;
2753
2754           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2755           if (arg->tok != tok_bsymbol)
2756             goto err_label;
2757           else if (!ignore_content)
2758             {
2759               /* Check whether this section is already known.  */
2760               struct section_list *known = collate->sections;
2761               while (known != NULL)
2762                 {
2763                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2764                     break;
2765                   known = known->next;
2766                 }
2767
2768               if (known != NULL)
2769                 {
2770                   lr_error (ldfile,
2771                             _("%s: duplicate declaration of section `%s'"),
2772                             "LC_COLLATE", arg->val.str.startmb);
2773                   free (arg->val.str.startmb);
2774                 }
2775               else
2776                 collate->sections = make_seclist_elem (collate,
2777                                                        arg->val.str.startmb,
2778                                                        collate->sections);
2779
2780               lr_ignore_rest (ldfile, known == NULL);
2781             }
2782           else
2783             {
2784               free (arg->val.str.startmb);
2785               lr_ignore_rest (ldfile, 0);
2786             }
2787           break;
2788
2789         case tok_collating_element:
2790           /* Ignore the rest of the line if we don't need the input of
2791              this line.  */
2792           if (ignore_content)
2793             {
2794               lr_ignore_rest (ldfile, 0);
2795               break;
2796             }
2797
2798           if (state != 0 && state != 2)
2799             goto err_label;
2800
2801           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2802           if (arg->tok != tok_bsymbol)
2803             goto err_label;
2804           else
2805             {
2806               const char *symbol = arg->val.str.startmb;
2807               size_t symbol_len = arg->val.str.lenmb;
2808
2809               /* Next the `from' keyword.  */
2810               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2811               if (arg->tok != tok_from)
2812                 {
2813                   free ((char *) symbol);
2814                   goto err_label;
2815                 }
2816
2817               ldfile->return_widestr = 1;
2818               ldfile->translate_strings = 1;
2819
2820               /* Finally the string with the replacement.  */
2821               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2822
2823               ldfile->return_widestr = 0;
2824               ldfile->translate_strings = 0;
2825
2826               if (arg->tok != tok_string)
2827                 goto err_label;
2828
2829               if (!ignore_content && symbol != NULL)
2830                 {
2831                   /* The name is already defined.  */
2832                   if (check_duplicate (ldfile, collate, charmap,
2833                                        repertoire, symbol, symbol_len))
2834                     goto col_elem_free;
2835
2836                   if (arg->val.str.startmb != NULL)
2837                     insert_entry (&collate->elem_table, symbol, symbol_len,
2838                                   new_element (collate,
2839                                                arg->val.str.startmb,
2840                                                arg->val.str.lenmb - 1,
2841                                                arg->val.str.startwc,
2842                                                symbol, symbol_len, 0));
2843                 }
2844               else
2845                 {
2846                 col_elem_free:
2847                   if (symbol != NULL)
2848                     free ((char *) symbol);
2849                   if (arg->val.str.startmb != NULL)
2850                     free (arg->val.str.startmb);
2851                   if (arg->val.str.startwc != NULL)
2852                     free (arg->val.str.startwc);
2853                 }
2854               lr_ignore_rest (ldfile, 1);
2855             }
2856           break;
2857
2858         case tok_collating_symbol:
2859           /* Ignore the rest of the line if we don't need the input of
2860              this line.  */
2861           if (ignore_content)
2862             {
2863               lr_ignore_rest (ldfile, 0);
2864               break;
2865             }
2866
2867           if (state != 0 && state != 2)
2868             goto err_label;
2869
2870           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2871           if (arg->tok != tok_bsymbol)
2872             goto err_label;
2873           else
2874             {
2875               char *symbol = arg->val.str.startmb;
2876               size_t symbol_len = arg->val.str.lenmb;
2877               char *endsymbol = NULL;
2878               size_t endsymbol_len = 0;
2879               enum token_t ellipsis = tok_none;
2880
2881               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2882               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2883                 {
2884                   ellipsis = arg->tok;
2885
2886                   arg = lr_token (ldfile, charmap, result, repertoire,
2887                                   verbose);
2888                   if (arg->tok != tok_bsymbol)
2889                     {
2890                       free (symbol);
2891                       goto err_label;
2892                     }
2893
2894                   endsymbol = arg->val.str.startmb;
2895                   endsymbol_len = arg->val.str.lenmb;
2896
2897                   lr_ignore_rest (ldfile, 1);
2898                 }
2899               else if (arg->tok != tok_eol)
2900                 {
2901                   free (symbol);
2902                   goto err_label;
2903                 }
2904
2905               if (!ignore_content)
2906                 {
2907                   if (symbol == NULL
2908                       || (ellipsis != tok_none && endsymbol == NULL))
2909                     {
2910                       lr_error (ldfile, _("\
2911 %s: unknown character in collating symbol name"),
2912                                 "LC_COLLATE");
2913                       goto col_sym_free;
2914                     }
2915                   else if (ellipsis == tok_none)
2916                     {
2917                       /* A single symbol, no ellipsis.  */
2918                       if (check_duplicate (ldfile, collate, charmap,
2919                                            repertoire, symbol, symbol_len))
2920                         /* The name is already defined.  */
2921                         goto col_sym_free;
2922
2923                       insert_entry (&collate->sym_table, symbol, symbol_len,
2924                                     new_symbol (collate, symbol, symbol_len));
2925                     }
2926                   else if (symbol_len != endsymbol_len)
2927                     {
2928                     col_sym_inv_range:
2929                       lr_error (ldfile,
2930                                 _("invalid names for character range"));
2931                       goto col_sym_free;
2932                     }
2933                   else
2934                     {
2935                       /* Oh my, we have to handle an ellipsis.  First, as
2936                          usual, determine the common prefix and then
2937                          convert the rest into a range.  */
2938                       size_t prefixlen;
2939                       unsigned long int from;
2940                       unsigned long int to;
2941                       char *endp;
2942
2943                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2944                         if (symbol[prefixlen] != endsymbol[prefixlen])
2945                           break;
2946
2947                       /* Convert the rest into numbers.  */
2948                       symbol[symbol_len] = '\0';
2949                       from = strtoul (&symbol[prefixlen], &endp,
2950                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2951                       if (*endp != '\0')
2952                         goto col_sym_inv_range;
2953
2954                       endsymbol[symbol_len] = '\0';
2955                       to = strtoul (&endsymbol[prefixlen], &endp,
2956                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2957                       if (*endp != '\0')
2958                         goto col_sym_inv_range;
2959
2960                       if (from > to)
2961                         goto col_sym_inv_range;
2962
2963                       /* Now loop over all entries.  */
2964                       while (from <= to)
2965                         {
2966                           char *symbuf;
2967
2968                           symbuf = (char *) obstack_alloc (&collate->mempool,
2969                                                            symbol_len + 1);
2970
2971                           /* Create the name.  */
2972                           sprintf (symbuf,
2973                                    ellipsis == tok_ellipsis2
2974                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2975                                    (int) prefixlen, symbol,
2976                                    (int) (symbol_len - prefixlen), from);
2977
2978                           if (check_duplicate (ldfile, collate, charmap,
2979                                                repertoire, symbuf, symbol_len))
2980                             /* The name is already defined.  */
2981                             goto col_sym_free;
2982
2983                           insert_entry (&collate->sym_table, symbuf,
2984                                         symbol_len,
2985                                         new_symbol (collate, symbuf,
2986                                                     symbol_len));
2987
2988                           /* Increment the counter.  */
2989                           ++from;
2990                         }
2991
2992                       goto col_sym_free;
2993                     }
2994                 }
2995               else
2996                 {
2997                 col_sym_free:
2998                   if (symbol != NULL)
2999                     free (symbol);
3000                   if (endsymbol != NULL)
3001                     free (endsymbol);
3002                 }
3003             }
3004           break;
3005
3006         case tok_symbol_equivalence:
3007           /* Ignore the rest of the line if we don't need the input of
3008              this line.  */
3009           if (ignore_content)
3010             {
3011               lr_ignore_rest (ldfile, 0);
3012               break;
3013             }
3014
3015           if (state != 0)
3016             goto err_label;
3017
3018           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3019           if (arg->tok != tok_bsymbol)
3020             goto err_label;
3021           else
3022             {
3023               const char *newname = arg->val.str.startmb;
3024               size_t newname_len = arg->val.str.lenmb;
3025               const char *symname;
3026               size_t symname_len;
3027               void *symval;     /* Actually struct symbol_t*  */
3028
3029               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3030               if (arg->tok != tok_bsymbol)
3031                 {
3032                   if (newname != NULL)
3033                     free ((char *) newname);
3034                   goto err_label;
3035                 }
3036
3037               symname = arg->val.str.startmb;
3038               symname_len = arg->val.str.lenmb;
3039
3040               if (newname == NULL)
3041                 {
3042                   lr_error (ldfile, _("\
3043 %s: unknown character in equivalent definition name"),
3044                             "LC_COLLATE");
3045
3046                 sym_equiv_free:
3047                   if (newname != NULL)
3048                     free ((char *) newname);
3049                   if (symname != NULL)
3050                     free ((char *) symname);
3051                   break;
3052                 }
3053               if (symname == NULL)
3054                 {
3055                   lr_error (ldfile, _("\
3056 %s: unknown character in equivalent definition value"),
3057                             "LC_COLLATE");
3058                   goto sym_equiv_free;
3059                 }
3060
3061               /* See whether the symbol name is already defined.  */
3062               if (find_entry (&collate->sym_table, symname, symname_len,
3063                               &symval) != 0)
3064                 {
3065                   lr_error (ldfile, _("\
3066 %s: unknown symbol `%s' in equivalent definition"),
3067                             "LC_COLLATE", symname);
3068                   goto col_sym_free;
3069                 }
3070
3071               if (insert_entry (&collate->sym_table,
3072                                 newname, newname_len, symval) < 0)
3073                 {
3074                   lr_error (ldfile, _("\
3075 error while adding equivalent collating symbol"));
3076                   goto sym_equiv_free;
3077                 }
3078
3079               free ((char *) symname);
3080             }
3081           lr_ignore_rest (ldfile, 1);
3082           break;
3083
3084         case tok_script:
3085           /* We get told about the scripts we know.  */
3086           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3087           if (arg->tok != tok_bsymbol)
3088             goto err_label;
3089           else
3090             {
3091               struct section_list *runp = collate->known_sections;
3092               char *name;
3093
3094               while (runp != NULL)
3095                 if (strncmp (runp->name, arg->val.str.startmb,
3096                              arg->val.str.lenmb) == 0
3097                     && runp->name[arg->val.str.lenmb] == '\0')
3098                   break;
3099                 else
3100                   runp = runp->def_next;
3101
3102               if (runp != NULL)
3103                 {
3104                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3105                             runp->name);
3106                   lr_ignore_rest (ldfile, 0);
3107                   break;
3108                 }
3109
3110               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3111               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3112               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3113               name[arg->val.str.lenmb] = '\0';
3114               runp->name = name;
3115
3116               runp->def_next = collate->known_sections;
3117               collate->known_sections = runp;
3118             }
3119           lr_ignore_rest (ldfile, 1);
3120           break;
3121
3122         case tok_order_start:
3123           /* Ignore the rest of the line if we don't need the input of
3124              this line.  */
3125           if (ignore_content)
3126             {
3127               lr_ignore_rest (ldfile, 0);
3128               break;
3129             }
3130
3131           if (state != 0 && state != 1)
3132             goto err_label;
3133           state = 1;
3134
3135           /* The 14652 draft does not specify whether all `order_start' lines
3136              must contain the same number of sort-rules, but 14651 does.  So
3137              we require this here as well.  */
3138           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3139           if (arg->tok == tok_bsymbol)
3140             {
3141               /* This better should be a section name.  */
3142               struct section_list *sp = collate->known_sections;
3143               while (sp != NULL
3144                      && (sp->name == NULL
3145                          || strncmp (sp->name, arg->val.str.startmb,
3146                                      arg->val.str.lenmb) != 0
3147                          || sp->name[arg->val.str.lenmb] != '\0'))
3148                 sp = sp->def_next;
3149
3150               if (sp == NULL)
3151                 {
3152                   lr_error (ldfile, _("\
3153 %s: unknown section name `%s'"),
3154                             "LC_COLLATE", arg->val.str.startmb);
3155                   /* We use the error section.  */
3156                   collate->current_section = &collate->error_section;
3157
3158                   if (collate->error_section.first == NULL)
3159                     {
3160                       /* Insert &collate->error_section at the end of
3161                          the collate->sections list.  */
3162                       if (collate->sections == NULL)
3163                         collate->sections = &collate->error_section;
3164                       else
3165                         {
3166                           sp = collate->sections;
3167                           while (sp->next != NULL)
3168                             sp = sp->next;
3169
3170                           sp->next = &collate->error_section;
3171                         }
3172                       collate->error_section.next = NULL;
3173                     }
3174                 }
3175               else
3176                 {
3177                   /* One should not be allowed to open the same
3178                      section twice.  */
3179                   if (sp->first != NULL)
3180                     lr_error (ldfile, _("\
3181 %s: multiple order definitions for section `%s'"),
3182                               "LC_COLLATE", sp->name);
3183                   else
3184                     {
3185                       /* Insert sp in the collate->sections list,
3186                          right after collate->current_section.  */
3187                       if (collate->current_section == NULL)
3188                         collate->current_section = sp;
3189                       else
3190                         {
3191                           sp->next = collate->current_section->next;
3192                           collate->current_section->next = sp;
3193                         }
3194                     }
3195
3196                   /* Next should come the end of the line or a semicolon.  */
3197                   arg = lr_token (ldfile, charmap, result, repertoire,
3198                                   verbose);
3199                   if (arg->tok == tok_eol)
3200                     {
3201                       uint32_t cnt;
3202
3203                       /* This means we have exactly one rule: `forward'.  */
3204                       if (nrules > 1)
3205                         lr_error (ldfile, _("\
3206 %s: invalid number of sorting rules"),
3207                                   "LC_COLLATE");
3208                       else
3209                         nrules = 1;
3210                       sp->rules = obstack_alloc (&collate->mempool,
3211                                                  (sizeof (enum coll_sort_rule)
3212                                                   * nrules));
3213                       for (cnt = 0; cnt < nrules; ++cnt)
3214                         sp->rules[cnt] = sort_forward;
3215
3216                       /* Next line.  */
3217                       break;
3218                     }
3219
3220                   /* Get the next token.  */
3221                   arg = lr_token (ldfile, charmap, result, repertoire,
3222                                   verbose);
3223                 }
3224             }
3225           else
3226             {
3227               /* There is no section symbol.  Therefore we use the unnamed
3228                  section.  */
3229               collate->current_section = &collate->unnamed_section;
3230
3231               if (collate->unnamed_section.first != NULL)
3232                 lr_error (ldfile, _("\
3233 %s: multiple order definitions for unnamed section"),
3234                           "LC_COLLATE");
3235               else
3236                 {
3237                   /* Insert &collate->unnamed_section at the beginning of
3238                      the collate->sections list.  */
3239                   collate->unnamed_section.next = collate->sections;
3240                   collate->sections = &collate->unnamed_section;
3241                 }
3242             }
3243
3244           /* Now read the direction names.  */
3245           read_directions (ldfile, arg, charmap, repertoire, result);
3246
3247           /* From now we need the strings untranslated.  */
3248           ldfile->translate_strings = 0;
3249           break;
3250
3251         case tok_order_end:
3252           /* Ignore the rest of the line if we don't need the input of
3253              this line.  */
3254           if (ignore_content)
3255             {
3256               lr_ignore_rest (ldfile, 0);
3257               break;
3258             }
3259
3260           if (state != 1)
3261             goto err_label;
3262
3263           /* Handle ellipsis at end of list.  */
3264           if (was_ellipsis != tok_none)
3265             {
3266               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3267                                repertoire, result);
3268               was_ellipsis = tok_none;
3269             }
3270
3271           state = 2;
3272           lr_ignore_rest (ldfile, 1);
3273           break;
3274
3275         case tok_reorder_after:
3276           /* Ignore the rest of the line if we don't need the input of
3277              this line.  */
3278           if (ignore_content)
3279             {
3280               lr_ignore_rest (ldfile, 0);
3281               break;
3282             }
3283
3284           if (state == 1)
3285             {
3286               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3287                         "LC_COLLATE");
3288               state = 2;
3289
3290               /* Handle ellipsis at end of list.  */
3291               if (was_ellipsis != tok_none)
3292                 {
3293                   handle_ellipsis (ldfile, arg->val.str.startmb,
3294                                    arg->val.str.lenmb, was_ellipsis, charmap,
3295                                    repertoire, result);
3296                   was_ellipsis = tok_none;
3297                 }
3298             }
3299           else if (state != 2 && state != 3)
3300             goto err_label;
3301           state = 3;
3302
3303           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3304           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3305             {
3306               /* Find this symbol in the sequence table.  */
3307               char ucsbuf[10];
3308               char *startmb;
3309               size_t lenmb;
3310               struct element_t *insp;
3311               int no_error = 1;
3312               void *ptr;
3313
3314               if (arg->tok == tok_bsymbol)
3315                 {
3316                   startmb = arg->val.str.startmb;
3317                   lenmb = arg->val.str.lenmb;
3318                 }
3319               else
3320                 {
3321                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3322                   startmb = ucsbuf;
3323                   lenmb = 9;
3324                 }
3325
3326               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3327                 /* Yes, the symbol exists.  Simply point the cursor
3328                    to it.  */
3329                 collate->cursor = (struct element_t *) ptr;
3330               else
3331                 {
3332                   struct symbol_t *symbp;
3333                   void *ptr;
3334
3335                   if (find_entry (&collate->sym_table, startmb, lenmb,
3336                                   &ptr) == 0)
3337                     {
3338                       symbp = ptr;
3339
3340                       if (symbp->order->last != NULL
3341                           || symbp->order->next != NULL)
3342                         collate->cursor = symbp->order;
3343                       else
3344                         {
3345                           /* This is a collating symbol but its position
3346                              is not yet defined.  */
3347                           lr_error (ldfile, _("\
3348 %s: order for collating symbol %.*s not yet defined"),
3349                                     "LC_COLLATE", (int) lenmb, startmb);
3350                           collate->cursor = NULL;
3351                           no_error = 0;
3352                         }
3353                     }
3354                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3355                                        &ptr) == 0)
3356                     {
3357                       insp = (struct element_t *) ptr;
3358
3359                       if (insp->last != NULL || insp->next != NULL)
3360                         collate->cursor = insp;
3361                       else
3362                         {
3363                           /* This is a collating element but its position
3364                              is not yet defined.  */
3365                           lr_error (ldfile, _("\
3366 %s: order for collating element %.*s not yet defined"),
3367                                     "LC_COLLATE", (int) lenmb, startmb);
3368                           collate->cursor = NULL;
3369                           no_error = 0;
3370                         }
3371                     }
3372                   else
3373                     {
3374                       /* This is bad.  The symbol after which we have to
3375                          insert does not exist.  */
3376                       lr_error (ldfile, _("\
3377 %s: cannot reorder after %.*s: symbol not known"),
3378                                 "LC_COLLATE", (int) lenmb, startmb);
3379                       collate->cursor = NULL;
3380                       no_error = 0;
3381                     }
3382                 }
3383
3384               lr_ignore_rest (ldfile, no_error);
3385             }
3386           else
3387             /* This must not happen.  */
3388             goto err_label;
3389           break;
3390
3391         case tok_reorder_end:
3392           /* Ignore the rest of the line if we don't need the input of
3393              this line.  */
3394           if (ignore_content)
3395             break;
3396
3397           if (state != 3)
3398             goto err_label;
3399           state = 4;
3400           lr_ignore_rest (ldfile, 1);
3401           break;
3402
3403         case tok_reorder_sections_after:
3404           /* Ignore the rest of the line if we don't need the input of
3405              this line.  */
3406           if (ignore_content)
3407             {
3408               lr_ignore_rest (ldfile, 0);
3409               break;
3410             }
3411
3412           if (state == 1)
3413             {
3414               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3415                         "LC_COLLATE");
3416               state = 2;
3417
3418               /* Handle ellipsis at end of list.  */
3419               if (was_ellipsis != tok_none)
3420                 {
3421                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3422                                    repertoire, result);
3423                   was_ellipsis = tok_none;
3424                 }
3425             }
3426           else if (state == 3)
3427             {
3428               WITH_CUR_LOCALE (error (0, 0, _("\
3429 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3430               state = 4;
3431             }
3432           else if (state != 2 && state != 4)
3433             goto err_label;
3434           state = 5;
3435
3436           /* Get the name of the sections we are adding after.  */
3437           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3438           if (arg->tok == tok_bsymbol)
3439             {
3440               /* Now find a section with this name.  */
3441               struct section_list *runp = collate->sections;
3442
3443               while (runp != NULL)
3444                 {
3445                   if (runp->name != NULL
3446                       && strlen (runp->name) == arg->val.str.lenmb
3447                       && memcmp (runp->name, arg->val.str.startmb,
3448                                  arg->val.str.lenmb) == 0)
3449                     break;
3450
3451                   runp = runp->next;
3452                 }
3453
3454               if (runp != NULL)
3455                 collate->current_section = runp;
3456               else
3457                 {
3458                   /* This is bad.  The section after which we have to
3459                      reorder does not exist.  Therefore we cannot
3460                      process the whole rest of this reorder
3461                      specification.  */
3462                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3463                             "LC_COLLATE", (int) arg->val.str.lenmb,
3464                             arg->val.str.startmb);
3465
3466                   do
3467                     {
3468                       lr_ignore_rest (ldfile, 0);
3469
3470                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3471                     }
3472                   while (now->tok == tok_reorder_sections_after
3473                          || now->tok == tok_reorder_sections_end
3474                          || now->tok == tok_end);
3475
3476                   /* Process the token we just saw.  */
3477                   nowtok = now->tok;
3478                   continue;
3479                 }
3480             }
3481           else
3482             /* This must not happen.  */
3483             goto err_label;
3484           break;
3485
3486         case tok_reorder_sections_end:
3487           /* Ignore the rest of the line if we don't need the input of
3488              this line.  */
3489           if (ignore_content)
3490             break;
3491
3492           if (state != 5)
3493             goto err_label;
3494           state = 6;
3495           lr_ignore_rest (ldfile, 1);
3496           break;
3497
3498         case tok_bsymbol:
3499         case tok_ucs4:
3500           /* Ignore the rest of the line if we don't need the input of
3501              this line.  */
3502           if (ignore_content)
3503             {
3504               lr_ignore_rest (ldfile, 0);
3505               break;
3506             }
3507
3508           if (state != 0 && state != 1 && state != 3 && state != 5)
3509             goto err_label;
3510
3511           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3512             goto err_label;
3513
3514           if (nowtok == tok_ucs4)
3515             {
3516               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3517               symstr = ucs4buf;
3518               symlen = 9;
3519             }
3520           else if (arg != NULL)
3521             {
3522               symstr = arg->val.str.startmb;
3523               symlen = arg->val.str.lenmb;
3524             }
3525           else
3526             {
3527               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3528                         (int) ldfile->token.val.str.lenmb,
3529                         ldfile->token.val.str.startmb);
3530               break;
3531             }
3532
3533           if (state == 0)
3534             {
3535               /* We are outside an `order_start' region.  This means
3536                  we must only accept definitions of values for
3537                  collation symbols since these are purely abstract
3538                  values and don't need directions associated.  */
3539               struct element_t *seqp;
3540               void *ptr;
3541
3542               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3543                 {
3544                   seqp = ptr;
3545
3546                   /* It's already defined.  First check whether this
3547                      is really a collating symbol.  */
3548                   if (seqp->is_character)
3549                     goto err_label;
3550
3551                   goto move_entry;
3552                 }
3553               else
3554                 {
3555                   void *result;
3556
3557                   if (find_entry (&collate->sym_table, symstr, symlen,
3558                                   &result) != 0)
3559                     /* No collating symbol, it's an error.  */
3560                     goto err_label;
3561
3562                   /* Maybe this is the first time we define a symbol
3563                      value and it is before the first actual section.  */
3564                   if (collate->sections == NULL)
3565                     collate->sections = collate->current_section =
3566                       &collate->symbol_section;
3567                 }
3568
3569               if (was_ellipsis != tok_none)
3570                 {
3571
3572                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3573                                    charmap, repertoire, result);
3574
3575                   /* Remember that we processed the ellipsis.  */
3576                   was_ellipsis = tok_none;
3577
3578                   /* And don't add the value a second time.  */
3579                   break;
3580                 }
3581             }
3582           else if (state == 3)
3583             {
3584               /* It is possible that we already have this collation sequence.
3585                  In this case we move the entry.  */
3586               struct element_t *seqp = NULL;
3587               void *sym;
3588               void *ptr;
3589
3590               /* If the symbol after which we have to insert was not found
3591                  ignore all entries.  */
3592               if (collate->cursor == NULL)
3593                 {
3594                   lr_ignore_rest (ldfile, 0);
3595                   break;
3596                 }
3597
3598               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3599                 {
3600                   seqp = (struct element_t *) ptr;
3601                   goto move_entry;
3602                 }
3603
3604               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3605                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3606                 goto move_entry;
3607
3608               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3609                   && (seqp = (struct element_t *) ptr,
3610                       seqp->last != NULL || seqp->next != NULL
3611                       || (collate->start != NULL && seqp == collate->start)))
3612                 {
3613                 move_entry:
3614                   /* Remove the entry from the old position.  */
3615                   if (seqp->last == NULL)
3616                     collate->start = seqp->next;
3617                   else
3618                     seqp->last->next = seqp->next;
3619                   if (seqp->next != NULL)
3620                     seqp->next->last = seqp->last;
3621
3622                   /* We also have to check whether this entry is the
3623                      first or last of a section.  */
3624                   if (seqp->section->first == seqp)
3625                     {
3626                       if (seqp->section->first == seqp->section->last)
3627                         /* This section has no content anymore.  */
3628                         seqp->section->first = seqp->section->last = NULL;
3629                       else
3630                         seqp->section->first = seqp->next;
3631                     }
3632                   else if (seqp->section->last == seqp)
3633                     seqp->section->last = seqp->last;
3634
3635                   /* Now insert it in the new place.  */
3636                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3637                                   tok_none);
3638                   break;
3639                 }
3640
3641               /* Otherwise we just add a new entry.  */
3642             }
3643           else if (state == 5)
3644             {
3645               /* We are reordering sections.  Find the named section.  */
3646               struct section_list *runp = collate->sections;
3647               struct section_list *prevp = NULL;
3648
3649               while (runp != NULL)
3650                 {
3651                   if (runp->name != NULL
3652                       && strlen (runp->name) == symlen
3653                       && memcmp (runp->name, symstr, symlen) == 0)
3654                     break;
3655
3656                   prevp = runp;
3657                   runp = runp->next;
3658                 }
3659
3660               if (runp == NULL)
3661                 {
3662                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3663                             "LC_COLLATE", (int) symlen, symstr);
3664                   lr_ignore_rest (ldfile, 0);
3665                 }
3666               else
3667                 {
3668                   if (runp != collate->current_section)
3669                     {
3670                       /* Remove the named section from the old place and
3671                          insert it in the new one.  */
3672                       prevp->next = runp->next;
3673
3674                       runp->next = collate->current_section->next;
3675                       collate->current_section->next = runp;
3676                       collate->current_section = runp;
3677                     }
3678
3679                   /* Process the rest of the line which might change
3680                      the collation rules.  */
3681                   arg = lr_token (ldfile, charmap, result, repertoire,
3682                                   verbose);
3683                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3684                     read_directions (ldfile, arg, charmap, repertoire,
3685                                      result);
3686                 }
3687               break;
3688             }
3689           else if (was_ellipsis != tok_none)
3690             {
3691               /* Using the information in the `ellipsis_weight'
3692                  element and this and the last value we have to handle
3693                  the ellipsis now.  */
3694               assert (state == 1);
3695
3696               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3697                                repertoire, result);
3698
3699               /* Remember that we processed the ellipsis.  */
3700               was_ellipsis = tok_none;
3701
3702               /* And don't add the value a second time.  */
3703               break;
3704             }
3705
3706           /* Now insert in the new place.  */
3707           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3708           break;
3709
3710         case tok_undefined:
3711           /* Ignore the rest of the line if we don't need the input of
3712              this line.  */
3713           if (ignore_content)
3714             {
3715               lr_ignore_rest (ldfile, 0);
3716               break;
3717             }
3718
3719           if (state != 1)
3720             goto err_label;
3721
3722           if (was_ellipsis != tok_none)
3723             {
3724               lr_error (ldfile,
3725                         _("%s: cannot have `%s' as end of ellipsis range"),
3726                         "LC_COLLATE", "UNDEFINED");
3727
3728               unlink_element (collate);
3729               was_ellipsis = tok_none;
3730             }
3731
3732           /* See whether UNDEFINED already appeared somewhere.  */
3733           if (collate->undefined.next != NULL
3734               || &collate->undefined == collate->cursor)
3735             {
3736               lr_error (ldfile,
3737                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3738                         "LC_COLLATE", 9, "UNDEFINED",
3739                         collate->undefined.file,
3740                         collate->undefined.line);
3741               lr_ignore_rest (ldfile, 0);
3742             }
3743           else
3744             /* Parse the weights.  */
3745              insert_weights (ldfile, &collate->undefined, charmap,
3746                              repertoire, result, tok_none);
3747           break;
3748
3749         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3750         case tok_ellipsis3: /* absolute ellipsis */
3751         case tok_ellipsis4: /* symbolic decimal ellipsis */
3752           /* This is the symbolic (decimal or hexadecimal) or absolute
3753              ellipsis.  */
3754           if (was_ellipsis != tok_none)
3755             goto err_label;
3756
3757           if (state != 0 && state != 1 && state != 3)
3758             goto err_label;
3759
3760           was_ellipsis = nowtok;
3761
3762           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3763                           repertoire, result, nowtok);
3764           break;
3765
3766         case tok_end:
3767           /* Next we assume `LC_COLLATE'.  */
3768           if (!ignore_content)
3769             {
3770               if (state == 0)
3771                 /* We must either see a copy statement or have
3772                    ordering values.  */
3773                 lr_error (ldfile,
3774                           _("%s: empty category description not allowed"),
3775                           "LC_COLLATE");
3776               else if (state == 1)
3777                 {
3778                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3779                             "LC_COLLATE");
3780
3781                   /* Handle ellipsis at end of list.  */
3782                   if (was_ellipsis != tok_none)
3783                     {
3784                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3785                                        repertoire, result);
3786                       was_ellipsis = tok_none;
3787                     }
3788                 }
3789               else if (state == 3)
3790                 WITH_CUR_LOCALE (error (0, 0, _("\
3791 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3792               else if (state == 5)
3793                 WITH_CUR_LOCALE (error (0, 0, _("\
3794 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3795             }
3796           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3797           if (arg->tok == tok_eof)
3798             break;
3799           if (arg->tok == tok_eol)
3800             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3801           else if (arg->tok != tok_lc_collate)
3802             lr_error (ldfile, _("\
3803 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3804           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3805           return;
3806
3807         default:
3808         err_label:
3809           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3810         }
3811
3812       /* Prepare for the next round.  */
3813       now = lr_token (ldfile, charmap, result, NULL, verbose);
3814       nowtok = now->tok;
3815     }
3816
3817   /* When we come here we reached the end of the file.  */
3818   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3819 }