locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <errno.h>
  25 #include <error.h>
  26 #include <stdlib.h>
  27 #include <wchar.h>
  28 #include <sys/param.h>
  29
  30 #include "localedef.h"
  31 #include "charmap.h"
  32 #include "localeinfo.h"
  33 #include "linereader.h"
  34 #include "locfile.h"
  35 #include "elem-hash.h"
  36
  37 /* Uncomment the following line in the production version.  */
  38 /* #define NDEBUG 1 */
  39 #include <assert.h>
  40
  41 #define obstack_chunk_alloc malloc
  42 #define obstack_chunk_free free
  43
  44 static inline void
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  55 {
  56   if (sizeof (int32_t) == sizeof (int))
  57     obstack_int_grow_fast (obstack, data);
  58   else
  59     obstack_grow (obstack, &data, sizeof (int32_t));
  60 }
  61
  62 /* Forward declaration.  */
  63 struct element_t;
  64
  65 /* Data type for list of strings.  */
  66 struct section_list
  67 {
  68   /* Successor in the known_sections list.  */
  69   struct section_list *def_next;
  70   /* Successor in the sections list.  */
  71   struct section_list *next;
  72   /* Name of the section.  */
  73   const char *name;
  74   /* First element of this section.  */
  75   struct element_t *first;
  76   /* Last element of this section.  */
  77   struct element_t *last;
  78   /* These are the rules for this section.  */
  79   enum coll_sort_rule *rules;
  80   /* Index of the rule set in the appropriate section of the output file.  */
  81   int ruleidx;
  82 };
  83
  84 struct element_t;
  85
  86 struct element_list_t
  87 {
  88   /* Number of elements.  */
  89   int cnt;
  90
  91   struct element_t **w;
  92 };
  93
  94 /* Data type for collating element.  */
  95 struct element_t
  96 {
  97   const char *name;
  98
  99   const char *mbs;
 100   size_t nmbs;
 101   const uint32_t *wcs;
 102   size_t nwcs;
 103   int *mborder;
 104   int wcorder;
 105
 106   /* The following is a bit mask which bits are set if this element is
 107      used in the appropriate level.  Interesting for the singlebyte
 108      weight computation.
 109
 110      XXX The type here restricts the number of levels to 32.  It could
 111      be changed if necessary but I doubt this is necessary.  */
 112   unsigned int used_in_level;
 113
 114   struct element_list_t *weights;
 115
 116   /* Nonzero if this is a real character definition.  */
 117   int is_character;
 118
 119   /* Order of the character in the sequence.  This information will
 120      be used in range expressions.  */
 121   int mbseqorder;
 122   int wcseqorder;
 123
 124   /* Where does the definition come from.  */
 125   const char *file;
 126   size_t line;
 127
 128   /* Which section does this belong to.  */
 129   struct section_list *section;
 130
 131   /* Predecessor and successor in the order list.  */
 132   struct element_t *last;
 133   struct element_t *next;
 134
 135   /* Next element in multibyte output list.  */
 136   struct element_t *mbnext;
 137   struct element_t *mblast;
 138
 139   /* Next element in wide character output list.  */
 140   struct element_t *wcnext;
 141   struct element_t *wclast;
 142 };
 143
 144 /* Special element value.  */
 145 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 146 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 147 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 148
 149 /* Data type for collating symbol.  */
 150 struct symbol_t
 151 {
 152   const char *name;
 153
 154   /* Point to place in the order list.  */
 155   struct element_t *order;
 156
 157   /* Where does the definition come from.  */
 158   const char *file;
 159   size_t line;
 160 };
 161
 162 /* Sparse table of struct element_t *.  */
 163 #define TABLE wchead_table
 164 #define ELEMENT struct element_t *
 165 #define DEFAULT NULL
 166 #define ITERATE
 167 #define NO_FINALIZE
 168 #include "3level.h"
 169
 170 /* Sparse table of int32_t.  */
 171 #define TABLE collidx_table
 172 #define ELEMENT int32_t
 173 #define DEFAULT 0
 174 #include "3level.h"
 175
 176 /* Sparse table of uint32_t.  */
 177 #define TABLE collseq_table
 178 #define ELEMENT uint32_t
 179 #define DEFAULT ~((uint32_t) 0)
 180 #include "3level.h"
 181
 182
 183 /* The real definition of the struct for the LC_COLLATE locale.  */
 184 struct locale_collate_t
 185 {
 186   int col_weight_max;
 187   int cur_weight_max;
 188
 189   /* List of known scripts.  */
 190   struct section_list *known_sections;
 191   /* List of used sections.  */
 192   struct section_list *sections;
 193   /* Current section using definition.  */
 194   struct section_list *current_section;
 195   /* There always can be an unnamed section.  */
 196   struct section_list unnamed_section;
 197   /* To make handling of errors easier we have another section.  */
 198   struct section_list error_section;
 199   /* Sometimes we are defining the values for collating symbols before
 200      the first actual section.  */
 201   struct section_list symbol_section;
 202
 203   /* Start of the order list.  */
 204   struct element_t *start;
 205
 206   /* The undefined element.  */
 207   struct element_t undefined;
 208
 209   /* This is the cursor for `reorder_after' insertions.  */
 210   struct element_t *cursor;
 211
 212   /* This value is used when handling ellipsis.  */
 213   struct element_t ellipsis_weight;
 214
 215   /* Known collating elements.  */
 216   hash_table elem_table;
 217
 218   /* Known collating symbols.  */
 219   hash_table sym_table;
 220
 221   /* Known collation sequences.  */
 222   hash_table seq_table;
 223
 224   struct obstack mempool;
 225
 226   /* The LC_COLLATE category is a bit special as it is sometimes possible
 227      that the definitions from more than one input file contains information.
 228      Therefore we keep all relevant input in a list.  */
 229   struct locale_collate_t *next;
 230
 231   /* Arrays with heads of the list for each of the leading bytes in
 232      the multibyte sequences.  */
 233   struct element_t *mbheads[256];
 234
 235   /* Arrays with heads of the list for each of the leading bytes in
 236      the multibyte sequences.  */
 237   struct wchead_table wcheads;
 238
 239   /* The arrays with the collation sequence order.  */
 240   unsigned char mbseqorder[256];
 241   struct collseq_table wcseqorder;
 242 };
 243
 244
 245 /* We have a few global variables which are used for reading all
 246    LC_COLLATE category descriptions in all files.  */
 247 static uint32_t nrules;
 248
 249
 250 /* We need UTF-8 encoding of numbers.  */
 251 static inline int
 252 utf8_encode (char *buf, int val)
 253 {
 254   int retval;
 255
 256   if (val < 0x80)
 257     {
 258       *buf++ = (char) val;
 259       retval = 1;
 260     }
 261   else
 262     {
 263       int step;
 264
 265       for (step = 2; step < 6; ++step)
 266         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 267           break;
 268       retval = step;
 269
 270       *buf = (unsigned char) (~0xff >> step);
 271       --step;
 272       do
 273         {
 274           buf[step] = 0x80 | (val & 0x3f);
 275           val >>= 6;
 276         }
 277       while (--step > 0);
 278       *buf |= val;
 279     }
 280
 281   return retval;
 282 }
 283
 284
 285 static struct section_list *
 286 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 287                    struct section_list *next)
 288 {
 289   struct section_list *newp;
 290
 291   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 292                                                 sizeof (*newp));
 293   newp->next = next;
 294   newp->name = string;
 295   newp->first = NULL;
 296   newp->last = NULL;
 297
 298   return newp;
 299 }
 300
 301
 302 static struct element_t *
 303 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 304              const uint32_t *wcs, const char *name, size_t namelen,
 305              int is_character)
 306 {
 307   struct element_t *newp;
 308
 309   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 310                                              sizeof (*newp));
 311   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 312                                                     name, namelen);
 313   if (mbs != NULL)
 314     {
 315       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 316       newp->nmbs = mbslen;
 317     }
 318   else
 319     {
 320       newp->mbs = NULL;
 321       newp->nmbs = 0;
 322     }
 323   if (wcs != NULL)
 324     {
 325       size_t nwcs = wcslen ((wchar_t *) wcs);
 326       uint32_t zero = 0;
 327       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 328       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 329       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 330       newp->nwcs = nwcs;
 331     }
 332   else
 333     {
 334       newp->wcs = NULL;
 335       newp->nwcs = 0;
 336     }
 337   newp->mborder = NULL;
 338   newp->wcorder = 0;
 339   newp->used_in_level = 0;
 340   newp->is_character = is_character;
 341
 342   /* Will be assigned later.  XXX  */
 343   newp->mbseqorder = 0;
 344   newp->wcseqorder = 0;
 345
 346   /* Will be allocated later.  */
 347   newp->weights = NULL;
 348
 349   newp->file = NULL;
 350   newp->line = 0;
 351
 352   newp->section = collate->current_section;
 353
 354   newp->last = NULL;
 355   newp->next = NULL;
 356
 357   newp->mbnext = NULL;
 358   newp->mblast = NULL;
 359
 360   newp->wcnext = NULL;
 361   newp->wclast = NULL;
 362
 363   return newp;
 364 }
 365
 366
 367 static struct symbol_t *
 368 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 369 {
 370   struct symbol_t *newp;
 371
 372   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 373
 374   newp->name = obstack_copy0 (&collate->mempool, name, len);
 375   newp->order = NULL;
 376
 377   newp->file = NULL;
 378   newp->line = 0;
 379
 380   return newp;
 381 }
 382
 383
 384 /* Test whether this name is already defined somewhere.  */
 385 static int
 386 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 387                  const struct charmap_t *charmap,
 388                  struct repertoire_t *repertoire, const char *symbol,
 389                  size_t symbol_len)
 390 {
 391   void *ignore = NULL;
 392
 393   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 394     {
 395       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 396                 (int) symbol_len, symbol);
 397       return 1;
 398     }
 399
 400   if (repertoire != NULL
 401       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 402           == 0))
 403     {
 404       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 405                 (int) symbol_len, symbol);
 406       return 1;
 407     }
 408
 409   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 410     {
 411       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 412                 (int) symbol_len, symbol);
 413       return 1;
 414     }
 415
 416   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 417     {
 418       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 419                 (int) symbol_len, symbol);
 420       return 1;
 421     }
 422
 423   return 0;
 424 }
 425
 426
 427 /* Read the direction specification.  */
 428 static void
 429 read_directions (struct linereader *ldfile, struct token *arg,
 430                  const struct charmap_t *charmap,
 431                  struct repertoire_t *repertoire, struct localedef_t *result)
 432 {
 433   int cnt = 0;
 434   int max = nrules ?: 10;
 435   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 436   int warned = 0;
 437   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 438
 439   while (1)
 440     {
 441       int valid = 0;
 442
 443       if (arg->tok == tok_forward)
 444         {
 445           if (rules[cnt] & sort_backward)
 446             {
 447               if (! warned)
 448                 {
 449                   lr_error (ldfile, _("\
 450 %s: `forward' and `backward' are mutually excluding each other"),
 451                             "LC_COLLATE");
 452                   warned = 1;
 453                 }
 454             }
 455           else if (rules[cnt] & sort_forward)
 456             {
 457               if (! warned)
 458                 {
 459                   lr_error (ldfile, _("\
 460 %s: `%s' mentioned more than once in definition of weight %d"),
 461                             "LC_COLLATE", "forward", cnt + 1);
 462                 }
 463             }
 464           else
 465             rules[cnt] |= sort_forward;
 466
 467           valid = 1;
 468         }
 469       else if (arg->tok == tok_backward)
 470         {
 471           if (rules[cnt] & sort_forward)
 472             {
 473               if (! warned)
 474                 {
 475                   lr_error (ldfile, _("\
 476 %s: `forward' and `backward' are mutually excluding each other"),
 477                             "LC_COLLATE");
 478                   warned = 1;
 479                 }
 480             }
 481           else if (rules[cnt] & sort_backward)
 482             {
 483               if (! warned)
 484                 {
 485                   lr_error (ldfile, _("\
 486 %s: `%s' mentioned more than once in definition of weight %d"),
 487                             "LC_COLLATE", "backward", cnt + 1);
 488                 }
 489             }
 490           else
 491             rules[cnt] |= sort_backward;
 492
 493           valid = 1;
 494         }
 495       else if (arg->tok == tok_position)
 496         {
 497           if (rules[cnt] & sort_position)
 498             {
 499               if (! warned)
 500                 {
 501                   lr_error (ldfile, _("\
 502 %s: `%s' mentioned more than once in definition of weight %d"),
 503                             "LC_COLLATE", "position", cnt + 1);
 504                 }
 505             }
 506           else
 507             rules[cnt] |= sort_position;
 508
 509           valid = 1;
 510         }
 511
 512       if (valid)
 513         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 514
 515       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 516           || arg->tok == tok_semicolon)
 517         {
 518           if (! valid && ! warned)
 519             {
 520               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 521               warned = 1;
 522             }
 523
 524           /* See whether we have to increment the counter.  */
 525           if (arg->tok != tok_comma && rules[cnt] != 0)
 526             {
 527               /* Add the default `forward' if we have seen only `position'.  */
 528               if (rules[cnt] == sort_position)
 529                 rules[cnt] = sort_position | sort_forward;
 530
 531               ++cnt;
 532             }
 533
 534           if (arg->tok == tok_eof || arg->tok == tok_eol)
 535             /* End of line or file, so we exit the loop.  */
 536             break;
 537
 538           if (nrules == 0)
 539             {
 540               /* See whether we have enough room in the array.  */
 541               if (cnt == max)
 542                 {
 543                   max += 10;
 544                   rules = (enum coll_sort_rule *) xrealloc (rules,
 545                                                             max
 546                                                             * sizeof (*rules));
 547                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 548                 }
 549             }
 550           else
 551             {
 552               if (cnt == nrules)
 553                 {
 554                   /* There must not be any more rule.  */
 555                   if (! warned)
 556                     {
 557                       lr_error (ldfile, _("\
 558 %s: too many rules; first entry only had %d"),
 559                                 "LC_COLLATE", nrules);
 560                       warned = 1;
 561                     }
 562
 563                   lr_ignore_rest (ldfile, 0);
 564                   break;
 565                 }
 566             }
 567         }
 568       else
 569         {
 570           if (! warned)
 571             {
 572               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 573               warned = 1;
 574             }
 575         }
 576
 577       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 578     }
 579
 580   if (nrules == 0)
 581     {
 582       /* Now we know how many rules we have.  */
 583       nrules = cnt;
 584       rules = (enum coll_sort_rule *) xrealloc (rules,
 585                                                 nrules * sizeof (*rules));
 586     }
 587   else
 588     {
 589       if (cnt < nrules)
 590         {
 591           /* Not enough rules in this specification.  */
 592           if (! warned)
 593             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 594
 595           do
 596             rules[cnt] = sort_forward;
 597           while (++cnt < nrules);
 598         }
 599     }
 600
 601   collate->current_section->rules = rules;
 602 }
 603
 604
 605 static struct element_t *
 606 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 607               const char *str, size_t len)
 608 {
 609   struct element_t *result = NULL;
 610
 611   /* Search for the entries among the collation sequences already define.  */
 612   if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
 613     {
 614       /* Nope, not define yet.  So we see whether it is a
 615          collation symbol.  */
 616       void *ptr;
 617
 618       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 619         {
 620           /* It's a collation symbol.  */
 621           struct symbol_t *sym = (struct symbol_t *) ptr;
 622           result = sym->order;
 623
 624           if (result == NULL)
 625             result = sym->order = new_element (collate, NULL, 0, NULL,
 626                                                NULL, 0, 0);
 627         }
 628       else if (find_entry (&collate->elem_table, str, len,
 629                            (void **) &result) != 0)
 630         {
 631           /* It's also no collation element.  So it is a character
 632              element defined later.  */
 633           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 634           /* Insert it into the sequence table.  */
 635           insert_entry (&collate->seq_table, str, len, result);
 636         }
 637     }
 638
 639   return result;
 640 }
 641
 642
 643 static void
 644 unlink_element (struct locale_collate_t *collate)
 645 {
 646   if (collate->cursor == collate->start)
 647     {
 648       assert (collate->cursor->next == NULL);
 649       assert (collate->cursor->last == NULL);
 650       collate->cursor = NULL;
 651     }
 652   else
 653     {
 654       if (collate->cursor->next != NULL)
 655         collate->cursor->next->last = collate->cursor->last;
 656       if (collate->cursor->last != NULL)
 657         collate->cursor->last->next = collate->cursor->next;
 658       collate->cursor = collate->cursor->last;
 659     }
 660 }
 661
 662
 663 static void
 664 insert_weights (struct linereader *ldfile, struct element_t *elem,
 665                 const struct charmap_t *charmap,
 666                 struct repertoire_t *repertoire, struct localedef_t *result,
 667                 enum token_t ellipsis)
 668 {
 669   int weight_cnt;
 670   struct token *arg;
 671   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 672
 673   /* Initialize all the fields.  */
 674   elem->file = ldfile->fname;
 675   elem->line = ldfile->lineno;
 676
 677   elem->last = collate->cursor;
 678   elem->next = collate->cursor ? collate->cursor->next : NULL;
 679   if (collate->cursor != NULL && collate->cursor->next != NULL)
 680     collate->cursor->next->last = elem;
 681   if (collate->cursor != NULL)
 682     collate->cursor->next = elem;
 683   if (collate->start == NULL)
 684     {
 685       assert (collate->cursor == NULL);
 686       collate->start = elem;
 687     }
 688
 689   elem->section = collate->current_section;
 690
 691   if (collate->current_section->first == NULL)
 692     collate->current_section->first = elem;
 693   if (collate->current_section->last == collate->cursor)
 694     collate->current_section->last = elem;
 695
 696   collate->cursor = elem;
 697
 698   elem->weights = (struct element_list_t *)
 699     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 700   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 701
 702   weight_cnt = 0;
 703
 704   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 705   do
 706     {
 707       if (arg->tok == tok_eof || arg->tok == tok_eol)
 708         break;
 709
 710       if (arg->tok == tok_ignore)
 711         {
 712           /* The weight for this level has to be ignored.  We use the
 713              null pointer to indicate this.  */
 714           elem->weights[weight_cnt].w = (struct element_t **)
 715             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 716           elem->weights[weight_cnt].w[0] = NULL;
 717           elem->weights[weight_cnt].cnt = 1;
 718         }
 719       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 720         {
 721           char ucs4str[10];
 722           struct element_t *val;
 723           char *symstr;
 724           size_t symlen;
 725
 726           if (arg->tok == tok_bsymbol)
 727             {
 728               symstr = arg->val.str.startmb;
 729               symlen = arg->val.str.lenmb;
 730             }
 731           else
 732             {
 733               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 734               symstr = ucs4str;
 735               symlen = 9;
 736             }
 737
 738           val = find_element (ldfile, collate, symstr, symlen);
 739           if (val == NULL)
 740             break;
 741
 742           elem->weights[weight_cnt].w = (struct element_t **)
 743             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 744           elem->weights[weight_cnt].w[0] = val;
 745           elem->weights[weight_cnt].cnt = 1;
 746         }
 747       else if (arg->tok == tok_string)
 748         {
 749           /* Split the string up in the individual characters and put
 750              the element definitions in the list.  */
 751           const char *cp = arg->val.str.startmb;
 752           int cnt = 0;
 753           struct element_t *charelem;
 754           struct element_t **weights = NULL;
 755           int max = 0;
 756
 757           if (*cp == '\0')
 758             {
 759               lr_error (ldfile, _("%s: empty weight string not allowed"),
 760                         "LC_COLLATE");
 761               lr_ignore_rest (ldfile, 0);
 762               break;
 763             }
 764
 765           do
 766             {
 767               if (*cp == '<')
 768                 {
 769                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 770                      the latter we have to unify the name.  */
 771                   const char *startp = ++cp;
 772                   size_t len;
 773
 774                   while (*cp != '>')
 775                     {
 776                       if (*cp == ldfile->escape_char)
 777                         ++cp;
 778                       if (*cp == '\0')
 779                         /* It's a syntax error.  */
 780                         goto syntax;
 781
 782                       ++cp;
 783                     }
 784
 785                   if (cp - startp == 5 && startp[0] == 'U'
 786                       && isxdigit (startp[1]) && isxdigit (startp[2])
 787                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 788                     {
 789                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 790                       char *newstr;
 791
 792                       newstr = (char *) xmalloc (10);
 793                       snprintf (newstr, 10, "U%08X", ucs4);
 794                       startp = newstr;
 795
 796                       len = 9;
 797                     }
 798                   else
 799                     len = cp - startp;
 800
 801                   charelem = find_element (ldfile, collate, startp, len);
 802                   ++cp;
 803                 }
 804               else
 805                 {
 806                   /* People really shouldn't use characters directly in
 807                      the string.  Especially since it's not really clear
 808                      what this means.  We interpret all characters in the
 809                      string as if that would be bsymbols.  Otherwise we
 810                      would have to match back to bsymbols somehow and this
 811                      is normally not what people normally expect.  */
 812                   charelem = find_element (ldfile, collate, cp++, 1);
 813                 }
 814
 815               if (charelem == NULL)
 816                 {
 817                   /* We ignore the rest of the line.  */
 818                   lr_ignore_rest (ldfile, 0);
 819                   break;
 820                 }
 821
 822               /* Add the pointer.  */
 823               if (cnt >= max)
 824                 {
 825                   struct element_t **newp;
 826                   max += 10;
 827                   newp = (struct element_t **)
 828                     alloca (max * sizeof (struct element_t *));
 829                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 830                   weights = newp;
 831                 }
 832               weights[cnt++] = charelem;
 833             }
 834           while (*cp != '\0');
 835
 836           /* Now store the information.  */
 837           elem->weights[weight_cnt].w = (struct element_t **)
 838             obstack_alloc (&collate->mempool,
 839                            cnt * sizeof (struct element_t *));
 840           memcpy (elem->weights[weight_cnt].w, weights,
 841                   cnt * sizeof (struct element_t *));
 842           elem->weights[weight_cnt].cnt = cnt;
 843
 844           /* We don't need the string anymore.  */
 845           free (arg->val.str.startmb);
 846         }
 847       else if (ellipsis != tok_none
 848                && (arg->tok == tok_ellipsis2
 849                    || arg->tok == tok_ellipsis3
 850                    || arg->tok == tok_ellipsis4))
 851         {
 852           /* It must be the same ellipsis as used in the initial column.  */
 853           if (arg->tok != ellipsis)
 854             lr_error (ldfile, _("\
 855 %s: weights must use the same ellipsis symbol as the name"),
 856                       "LC_COLLATE");
 857
 858           /* The weight for this level will depend on the element
 859              iterating over the range.  Put a placeholder.  */
 860           elem->weights[weight_cnt].w = (struct element_t **)
 861             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 862           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 863           elem->weights[weight_cnt].cnt = 1;
 864         }
 865       else
 866         {
 867         syntax:
 868           /* It's a syntax error.  */
 869           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 870           lr_ignore_rest (ldfile, 0);
 871           break;
 872         }
 873
 874       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 875       /* This better should be the end of the line or a semicolon.  */
 876       if (arg->tok == tok_semicolon)
 877         /* OK, ignore this and read the next token.  */
 878         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 879       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 880         {
 881           /* It's a syntax error.  */
 882           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 883           lr_ignore_rest (ldfile, 0);
 884           break;
 885         }
 886     }
 887   while (++weight_cnt < nrules);
 888
 889   if (weight_cnt < nrules)
 890     {
 891       /* This means the rest of the line uses the current element as
 892          the weight.  */
 893       do
 894         {
 895           elem->weights[weight_cnt].w = (struct element_t **)
 896             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 897           if (ellipsis == tok_none)
 898             elem->weights[weight_cnt].w[0] = elem;
 899           else
 900             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 901           elem->weights[weight_cnt].cnt = 1;
 902         }
 903       while (++weight_cnt < nrules);
 904     }
 905   else
 906     {
 907       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 908         {
 909           /* Too many rule values.  */
 910           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 911           lr_ignore_rest (ldfile, 0);
 912         }
 913       else
 914         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 915     }
 916 }
 917
 918
 919 static int
 920 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 921               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 922               struct localedef_t *result)
 923 {
 924   /* First find out what kind of symbol this is.  */
 925   struct charseq *seq;
 926   uint32_t wc;
 927   struct element_t *elem = NULL;
 928   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 929
 930   /* Try to find the character in the charmap.  */
 931   seq = charmap_find_value (charmap, symstr, symlen);
 932
 933   /* Determine the wide character.  */
 934   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 935     {
 936       wc = repertoire_find_value (repertoire, symstr, symlen);
 937       if (seq != NULL)
 938         seq->ucs4 = wc;
 939     }
 940   else
 941     wc = seq->ucs4;
 942
 943   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 944     {
 945       /* It's no character, so look through the collation elements and
 946          symbol list.  */
 947       if (find_entry (&collate->elem_table, symstr, symlen,
 948                            (void **) &elem) != 0)
 949         {
 950           void *result;
 951           struct symbol_t *sym = NULL;
 952
 953           /* It's also collation element.  Therefore it's either a
 954              collating symbol or it's a character which is not
 955              supported by the character set.  In the later case we
 956              simply create a dummy entry.  */
 957           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 958             {
 959               /* It's a collation symbol.  */
 960               sym = (struct symbol_t *) result;
 961
 962               elem = sym->order;
 963             }
 964
 965           if (elem == NULL)
 966             {
 967               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 968
 969               if (sym != NULL)
 970                 sym->order = elem;
 971               else
 972                 /* Enter a fake element in the sequence table.  This
 973                    won't cause anything in the output since there is
 974                    no multibyte or wide character associated with
 975                    it.  */
 976                 insert_entry (&collate->seq_table, symstr, symlen, elem);
 977             }
 978         }
 979     }
 980   else
 981     {
 982       /* Otherwise the symbols stands for a character.  */
 983       if (find_entry (&collate->seq_table, symstr, symlen,
 984                       (void **) &elem) != 0)
 985         {
 986           uint32_t wcs[2] = { wc, 0 };
 987
 988           /* We have to allocate an entry.  */
 989           elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
 990                               seq != NULL ? seq->nbytes : 0,
 991                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
 992                               symstr, symlen, 1);
 993
 994           /* And add it to the table.  */
 995           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
 996             /* This cannot happen.  */
 997             assert (! "Internal error");
 998         }
 999       else
1000         {
1001           /* Maybe the character was used before the definition.  In this case
1002              we have to insert the byte sequences now.  */
1003           if (elem->mbs == NULL && seq != NULL)
1004             {
1005               elem->mbs = obstack_copy0 (&collate->mempool,
1006                                          seq->bytes, seq->nbytes);
1007               elem->nmbs = seq->nbytes;
1008             }
1009
1010           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1011             {
1012               uint32_t wcs[2] = { wc, 0 };
1013
1014               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1015               elem->nwcs = 1;
1016             }
1017         }
1018     }
1019
1020   /* Test whether this element is not already in the list.  */
1021   if (elem->next != NULL || elem == collate->cursor)
1022     {
1023       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1024                 (int) symlen, symstr, elem->file, elem->line);
1025       lr_ignore_rest (ldfile, 0);
1026       return 1;
1027     }
1028
1029   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1030
1031   return 0;
1032 }
1033
1034
1035 static void
1036 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1037                  enum token_t ellipsis, const struct charmap_t *charmap,
1038                  struct repertoire_t *repertoire,
1039                  struct localedef_t *result)
1040 {
1041   struct element_t *startp;
1042   struct element_t *endp;
1043   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1044
1045   /* Unlink the entry added for the ellipsis.  */
1046   unlink_element (collate);
1047   startp = collate->cursor;
1048
1049   /* Process and add the end-entry.  */
1050   if (symstr != NULL
1051       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1052     /* Something went wrong with inserting the to-value.  This means
1053        we cannot process the ellipsis.  */
1054     return;
1055
1056   /* Reset the cursor.  */
1057   collate->cursor = startp;
1058
1059   /* Now we have to handle many different situations:
1060      - we have to distinguish between the three different ellipsis forms
1061      - the is the ellipsis at the beginning, in the middle, or at the end.
1062   */
1063   endp = collate->cursor->next;
1064   assert (symstr == NULL || endp != NULL);
1065
1066   /* XXX The following is probably very wrong since also collating symbols
1067      can appear in ranges.  But do we want/can refine the test for that?  */
1068 #if 0
1069   /* Both, the start and the end symbol, must stand for characters.  */
1070   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1071       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1072     {
1073       lr_error (ldfile, _("\
1074 %s: the start and the end symbol of a range must stand for characters"),
1075                 "LC_COLLATE");
1076       return;
1077     }
1078 #endif
1079
1080   if (ellipsis == tok_ellipsis3)
1081     {
1082       /* One requirement we make here: the length of the byte
1083          sequences for the first and end character must be the same.
1084          This is mainly to prevent unwanted effects and this is often
1085          not what is wanted.  */
1086       size_t len = (startp->mbs != NULL ? startp->nmbs
1087                     : (endp->mbs != NULL ? endp->nmbs : 0));
1088       char mbcnt[len + 1];
1089       char mbend[len + 1];
1090
1091       /* Well, this should be caught somewhere else already.  Just to
1092          make sure.  */
1093       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1094       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1095
1096       if (startp != NULL && endp != NULL
1097           && startp->mbs != NULL && endp->mbs != NULL
1098           && startp->nmbs != endp->nmbs)
1099         {
1100           lr_error (ldfile, _("\
1101 %s: byte sequences of first and last character must have the same length"),
1102                     "LC_COLLATE");
1103           return;
1104         }
1105
1106       /* Determine whether we have to generate multibyte sequences.  */
1107       if ((startp == NULL || startp->mbs != NULL)
1108           && (endp == NULL || endp->mbs != NULL))
1109         {
1110           int cnt;
1111           int ret;
1112
1113           /* Prepare the beginning byte sequence.  This is either from the
1114              beginning byte sequence or it is all nulls if it was an
1115              initial ellipsis.  */
1116           if (startp == NULL || startp->mbs == NULL)
1117             memset (mbcnt, '\0', len);
1118           else
1119             {
1120               memcpy (mbcnt, startp->mbs, len);
1121
1122               /* And increment it so that the value is the first one we will
1123                  try to insert.  */
1124               for (cnt = len - 1; cnt >= 0; --cnt)
1125                 if (++mbcnt[cnt] != '\0')
1126                   break;
1127             }
1128           mbcnt[len] = '\0';
1129
1130           /* And the end sequence.  */
1131           if (endp == NULL || endp->mbs == NULL)
1132             memset (mbend, '\0', len);
1133           else
1134             memcpy (mbend, endp->mbs, len);
1135           mbend[len] = '\0';
1136
1137           /* Test whether we have a correct range.  */
1138           ret = memcmp (mbcnt, mbend, len);
1139           if (ret >= 0)
1140             {
1141               if (ret > 0)
1142                 lr_error (ldfile, _("%s: byte sequence of first character of \
1143 sequence is not lower than that of the last character"), "LC_COLLATE");
1144               return;
1145             }
1146
1147           /* Generate the byte sequences data.  */
1148           while (1)
1149             {
1150               struct charseq *seq;
1151
1152               /* Quite a bit of work ahead.  We have to find the character
1153                  definition for the byte sequence and then determine the
1154                  wide character belonging to it.  */
1155               seq = charmap_find_symbol (charmap, mbcnt, len);
1156               if (seq != NULL)
1157                 {
1158                   struct element_t *elem;
1159                   size_t namelen;
1160
1161                   /* I don't this this can ever happen.  */
1162                   assert (seq->name != NULL);
1163                   namelen = strlen (seq->name);
1164
1165                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1166                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1167                                                        namelen);
1168
1169                   /* Now we are ready to insert the new value in the
1170                      sequence.  Find out whether the element is
1171                      already known.  */
1172                   if (find_entry (&collate->seq_table, seq->name, namelen,
1173                                   (void **) &elem) != 0)
1174                     {
1175                       uint32_t wcs[2] = { seq->ucs4, 0 };
1176
1177                       /* We have to allocate an entry.  */
1178                       elem = new_element (collate, mbcnt, len,
1179                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1180                                           ? NULL : wcs, seq->name,
1181                                           namelen, 1);
1182
1183                       /* And add it to the table.  */
1184                       if (insert_entry (&collate->seq_table, seq->name,
1185                                         namelen, elem) != 0)
1186                         /* This cannot happen.  */
1187                         assert (! "Internal error");
1188                     }
1189
1190                   /* Test whether this element is not already in the list.  */
1191                   if (elem->next != NULL || (collate->cursor != NULL
1192                                              && elem->next == collate->cursor))
1193                     {
1194                       lr_error (ldfile, _("\
1195 order for `%.*s' already defined at %s:%Zu"),
1196                                 (int) namelen, seq->name,
1197                                 elem->file, elem->line);
1198                       goto increment;
1199                     }
1200
1201                   /* Enqueue the new element.  */
1202                   elem->last = collate->cursor;
1203                   if (collate->cursor == NULL)
1204                     elem->next = NULL;
1205                   else
1206                     {
1207                       elem->next = collate->cursor->next;
1208                       elem->last->next = elem;
1209                       if (elem->next != NULL)
1210                         elem->next->last = elem;
1211                     }
1212                   if (collate->start == NULL)
1213                     {
1214                       assert (collate->cursor == NULL);
1215                       collate->start = elem;
1216                     }
1217                   collate->cursor = elem;
1218
1219                  /* Add the weight value.  We take them from the
1220                     `ellipsis_weights' member of `collate'.  */
1221                   elem->weights = (struct element_list_t *)
1222                     obstack_alloc (&collate->mempool,
1223                                    nrules * sizeof (struct element_list_t));
1224                   for (cnt = 0; cnt < nrules; ++cnt)
1225                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1226                         && (collate->ellipsis_weight.weights[cnt].w[0]
1227                             == ELEMENT_ELLIPSIS2))
1228                       {
1229                         elem->weights[cnt].w = (struct element_t **)
1230                           obstack_alloc (&collate->mempool,
1231                                          sizeof (struct element_t *));
1232                         elem->weights[cnt].w[0] = elem;
1233                         elem->weights[cnt].cnt = 1;
1234                       }
1235                     else
1236                       {
1237                         /* Simply use the weight from `ellipsis_weight'.  */
1238                         elem->weights[cnt].w =
1239                           collate->ellipsis_weight.weights[cnt].w;
1240                         elem->weights[cnt].cnt =
1241                           collate->ellipsis_weight.weights[cnt].cnt;
1242                       }
1243                 }
1244
1245               /* Increment for the next round.  */
1246             increment:
1247               for (cnt = len - 1; cnt >= 0; --cnt)
1248                 if (++mbcnt[cnt] != '\0')
1249                   break;
1250
1251               /* Find out whether this was all.  */
1252               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1253                 /* Yep, that's all.  */
1254                 break;
1255             }
1256         }
1257     }
1258   else
1259     {
1260       /* For symbolic range we naturally must have a beginning and an
1261          end specified by the user.  */
1262       if (startp == NULL)
1263         lr_error (ldfile, _("\
1264 %s: symbolic range ellipsis must not directly follow `order_start'"),
1265                   "LC_COLLATE");
1266       else if (endp == NULL)
1267         lr_error (ldfile, _("\
1268 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1269                   "LC_COLLATE");
1270       else
1271         {
1272           /* Determine the range.  To do so we have to determine the
1273              common prefix of the both names and then the numeric
1274              values of both ends.  */
1275           size_t lenfrom = strlen (startp->name);
1276           size_t lento = strlen (endp->name);
1277           char buf[lento + 1];
1278           int preflen = 0;
1279           long int from;
1280           long int to;
1281           char *cp;
1282           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1283
1284           if (lenfrom != lento)
1285             {
1286             invalid_range:
1287               lr_error (ldfile, _("\
1288 `%s' and `%.*s' are no valid names for symbolic range"),
1289                         startp->name, (int) lento, endp->name);
1290               return;
1291             }
1292
1293           while (startp->name[preflen] == endp->name[preflen])
1294             if (startp->name[preflen] == '\0')
1295               /* Nothing to be done.  The start and end point are identical
1296                  and while inserting the end point we have already given
1297                  the user an error message.  */
1298               return;
1299             else
1300               ++preflen;
1301
1302           errno = 0;
1303           from = strtol (startp->name + preflen, &cp, base);
1304           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1305             goto invalid_range;
1306
1307           errno = 0;
1308           to = strtol (endp->name + preflen, &cp, base);
1309           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1310             goto invalid_range;
1311
1312           /* Copy the prefix.  */
1313           memcpy (buf, startp->name, preflen);
1314
1315           /* Loop over all values.  */
1316           for (++from; from < to; ++from)
1317             {
1318               struct element_t *elem = NULL;
1319               struct charseq *seq;
1320               uint32_t wc;
1321               int cnt;
1322
1323               /* Generate the the name.  */
1324               sprintf (buf + preflen, base == 10 ? "%ld" : "%lX", from);
1325
1326               /* Look whether this name is already defined.  */
1327               if (find_entry (&collate->seq_table, buf, symlen,
1328                               (void **) &elem) == 0)
1329                 {
1330                   if (elem->next != NULL || (collate->cursor != NULL
1331                                              && elem->next == collate->cursor))
1332                     {
1333                       lr_error (ldfile, _("\
1334 %s: order for `%.*s' already defined at %s:%Zu"),
1335                                 "LC_COLLATE", (int) lenfrom, buf,
1336                                 elem->file, elem->line);
1337                       continue;
1338                     }
1339
1340                   if (elem->name == NULL)
1341                     {
1342                       lr_error (ldfile, _("%s: `%s' must be a character"),
1343                                 "LC_COLLATE", buf);
1344                       continue;
1345                     }
1346                 }
1347
1348               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1349                 {
1350                   /* Search for a character of this name.  */
1351                   seq = charmap_find_value (charmap, buf, lenfrom);
1352                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1353                     {
1354                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1355
1356                       if (seq != NULL)
1357                         seq->ucs4 = wc;
1358                     }
1359                   else
1360                     wc = seq->ucs4;
1361
1362                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1363                     /* We don't know anything about a character with this
1364                        name.  XXX Should we warn?  */
1365                     continue;
1366
1367                   if (elem == NULL)
1368                     {
1369                       uint32_t wcs[2] = { wc, 0 };
1370
1371                       /* We have to allocate an entry.  */
1372                       elem = new_element (collate,
1373                                           seq != NULL ? seq->bytes : NULL,
1374                                           seq != NULL ? seq->nbytes : 0,
1375                                           wc == ILLEGAL_CHAR_VALUE
1376                                           ? NULL : wcs, buf, lenfrom, 1);
1377                     }
1378                   else
1379                     {
1380                       /* Update the element.  */
1381                       if (seq != NULL)
1382                         {
1383                           elem->mbs = obstack_copy0 (&collate->mempool,
1384                                                      seq->bytes, seq->nbytes);
1385                           elem->nmbs = seq->nbytes;
1386                         }
1387
1388                       if (wc != ILLEGAL_CHAR_VALUE)
1389                         {
1390                           uint32_t zero = 0;
1391
1392                           obstack_grow (&collate->mempool,
1393                                         &wc, sizeof (uint32_t));
1394                           obstack_grow (&collate->mempool,
1395                                         &zero, sizeof (uint32_t));
1396                           elem->wcs = obstack_finish (&collate->mempool);
1397                           elem->nwcs = 1;
1398                         }
1399                     }
1400
1401                   elem->file = ldfile->fname;
1402                   elem->line = ldfile->lineno;
1403                   elem->section = collate->current_section;
1404                 }
1405
1406               /* Enqueue the new element.  */
1407               elem->last = collate->cursor;
1408               elem->next = collate->cursor->next;
1409               elem->last->next = elem;
1410               if (elem->next != NULL)
1411                 elem->next->last = elem;
1412               collate->cursor = elem;
1413
1414               /* Now add the weights.  They come from the `ellipsis_weights'
1415                  member of `collate'.  */
1416               elem->weights = (struct element_list_t *)
1417                 obstack_alloc (&collate->mempool,
1418                                nrules * sizeof (struct element_list_t));
1419               for (cnt = 0; cnt < nrules; ++cnt)
1420                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1421                     && (collate->ellipsis_weight.weights[cnt].w[0]
1422                         == ELEMENT_ELLIPSIS2))
1423                   {
1424                     elem->weights[cnt].w = (struct element_t **)
1425                       obstack_alloc (&collate->mempool,
1426                                      sizeof (struct element_t *));
1427                     elem->weights[cnt].w[0] = elem;
1428                     elem->weights[cnt].cnt = 1;
1429                   }
1430                 else
1431                   {
1432                     /* Simly use the weight from `ellipsis_weight'.  */
1433                     elem->weights[cnt].w =
1434                       collate->ellipsis_weight.weights[cnt].w;
1435                     elem->weights[cnt].cnt =
1436                       collate->ellipsis_weight.weights[cnt].cnt;
1437                   }
1438             }
1439         }
1440     }
1441 }
1442
1443
1444 static void
1445 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1446                  struct localedef_t *copy_locale, int ignore_content)
1447 {
1448   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1449     {
1450       struct locale_collate_t *collate;
1451
1452       if (copy_locale == NULL)
1453         {
1454           collate = locale->categories[LC_COLLATE].collate =
1455             (struct locale_collate_t *)
1456             xcalloc (1, sizeof (struct locale_collate_t));
1457
1458           /* Init the various data structures.  */
1459           init_hash (&collate->elem_table, 100);
1460           init_hash (&collate->sym_table, 100);
1461           init_hash (&collate->seq_table, 500);
1462           obstack_init (&collate->mempool);
1463
1464           collate->col_weight_max = -1;
1465         }
1466       else
1467         /* Reuse the copy_locale's data structures.  */
1468         collate = locale->categories[LC_COLLATE].collate =
1469           copy_locale->categories[LC_COLLATE].collate;
1470     }
1471
1472   ldfile->translate_strings = 0;
1473   ldfile->return_widestr = 0;
1474 }
1475
1476
1477 void
1478 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1479 {
1480   /* Now is the time when we can assign the individual collation
1481      values for all the symbols.  We have possibly different values
1482      for the wide- and the multibyte-character symbols.  This is done
1483      since it might make a difference in the encoding if there is in
1484      some cases no multibyte-character but there are wide-characters.
1485      (The other way around it is not important since theencoded
1486      collation value in the wide-character case is 32 bits wide and
1487      therefore requires no encoding).
1488
1489      The lowest collation value assigned is 2.  Zero is reserved for
1490      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1491      functions and 1 is used to separate the individual passes for the
1492      different rules.
1493
1494      We also have to construct is list with all the bytes/words which
1495      can come first in a sequence, followed by all the elements which
1496      also start with this byte/word.  The order is reverse which has
1497      among others the important effect that longer strings are located
1498      first in the list.  This is required for the output data since
1499      the algorithm used in `strcoll' etc depends on this.
1500
1501      The multibyte case is easy.  We simply sort into an array with
1502      256 elements.  */
1503   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1504   int mbact[nrules];
1505   int wcact;
1506   int mbseqact;
1507   int wcseqact;
1508   struct element_t *runp;
1509   int i;
1510   int need_undefined = 0;
1511   struct section_list *sect;
1512   int ruleidx;
1513   int nr_wide_elems = 0;
1514
1515   if (collate == NULL)
1516     {
1517       /* No data, no check.  */
1518       if (! be_quiet)
1519         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1520                                 "LC_COLLATE"));
1521       return;
1522     }
1523
1524   /* If this assertion is hit change the type in `element_t'.  */
1525   assert (nrules <= sizeof (runp->used_in_level) * 8);
1526
1527   /* Make sure that the `position' rule is used either in all sections
1528      or in none.  */
1529   for (i = 0; i < nrules; ++i)
1530     for (sect = collate->sections; sect != NULL; sect = sect->next)
1531       if (sect->rules != NULL
1532           && ((sect->rules[i] & sort_position)
1533               != (collate->sections->rules[i] & sort_position)))
1534         {
1535           WITH_CUR_LOCALE (error (0, 0, _("\
1536 %s: `position' must be used for a specific level in all sections or none"),
1537                                   "LC_COLLATE"));
1538           break;
1539         }
1540
1541   /* Find out which elements are used at which level.  At the same
1542      time we find out whether we have any undefined symbols.  */
1543   runp = collate->start;
1544   while (runp != NULL)
1545     {
1546       if (runp->mbs != NULL)
1547         {
1548           for (i = 0; i < nrules; ++i)
1549             {
1550               int j;
1551
1552               for (j = 0; j < runp->weights[i].cnt; ++j)
1553                 /* A NULL pointer as the weight means IGNORE.  */
1554                 if (runp->weights[i].w[j] != NULL)
1555                   {
1556                     if (runp->weights[i].w[j]->weights == NULL)
1557                       {
1558                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1559                                                         runp->line,
1560                                                         _("symbol `%s' not defined"),
1561                                                         runp->weights[i].w[j]->name));
1562
1563                         need_undefined = 1;
1564                         runp->weights[i].w[j] = &collate->undefined;
1565                       }
1566                     else
1567                       /* Set the bit for the level.  */
1568                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1569                   }
1570             }
1571         }
1572
1573       /* Up to the next entry.  */
1574       runp = runp->next;
1575     }
1576
1577   /* Walk through the list of defined sequences and assign weights.  Also
1578      create the data structure which will allow generating the single byte
1579      character based tables.
1580
1581      Since at each time only the weights for each of the rules are
1582      only compared to other weights for this rule it is possible to
1583      assign more compact weight values than simply counting all
1584      weights in sequence.  We can assign weights from 3, one for each
1585      rule individually and only for those elements, which are actually
1586      used for this rule.
1587
1588      Why is this important?  It is not for the wide char table.  But
1589      it is for the singlebyte output since here larger numbers have to
1590      be encoded to make it possible to emit the value as a byte
1591      string.  */
1592   for (i = 0; i < nrules; ++i)
1593     mbact[i] = 2;
1594   wcact = 2;
1595   mbseqact = 0;
1596   wcseqact = 0;
1597   runp = collate->start;
1598   while (runp != NULL)
1599     {
1600       /* Determine the order.  */
1601       if (runp->used_in_level != 0)
1602         {
1603           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1604                                                  nrules * sizeof (int));
1605
1606           for (i = 0; i < nrules; ++i)
1607             if ((runp->used_in_level & (1 << i)) != 0)
1608               runp->mborder[i] = mbact[i]++;
1609             else
1610               runp->mborder[i] = 0;
1611         }
1612
1613       if (runp->mbs != NULL)
1614         {
1615           struct element_t **eptr;
1616           struct element_t *lastp = NULL;
1617
1618           /* Find the point where to insert in the list.  */
1619           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1620           while (*eptr != NULL)
1621             {
1622               if ((*eptr)->nmbs < runp->nmbs)
1623                 break;
1624
1625               if ((*eptr)->nmbs == runp->nmbs)
1626                 {
1627                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1628
1629                   if (c == 0)
1630                     {
1631                       /* This should not happen.  It means that we have
1632                          to symbols with the same byte sequence.  It is
1633                          of course an error.  */
1634                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1635                                                       (*eptr)->line,
1636                                                       _("\
1637 symbol `%s' has the same encoding as"), (*eptr)->name);
1638                                        error_at_line (0, 0, runp->file,
1639                                                       runp->line,
1640                                                       _("symbol `%s'"),
1641                                                       runp->name));
1642                       goto dont_insert;
1643                     }
1644                   else if (c < 0)
1645                     /* Insert it here.  */
1646                     break;
1647                 }
1648
1649               /* To the next entry.  */
1650               lastp = *eptr;
1651               eptr = &(*eptr)->mbnext;
1652             }
1653
1654           /* Set the pointers.  */
1655           runp->mbnext = *eptr;
1656           runp->mblast = lastp;
1657           if (*eptr != NULL)
1658             (*eptr)->mblast = runp;
1659           *eptr = runp;
1660         dont_insert:
1661           ;
1662         }
1663
1664       if (runp->used_in_level)
1665         {
1666           runp->wcorder = wcact++;
1667
1668           /* We take the opportunity to count the elements which have
1669              wide characters.  */
1670           ++nr_wide_elems;
1671         }
1672
1673       if (runp->is_character)
1674         {
1675           if (runp->nmbs == 1)
1676             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1677
1678           runp->wcseqorder = wcseqact++;
1679         }
1680       else if (runp->mbs != NULL && runp->weights != NULL)
1681         /* This is for collation elements.  */
1682         runp->wcseqorder = wcseqact++;
1683
1684       /* Up to the next entry.  */
1685       runp = runp->next;
1686     }
1687
1688   /* Find out whether any of the `mbheads' entries is unset.  In this
1689      case we use the UNDEFINED entry.  */
1690   for (i = 1; i < 256; ++i)
1691     if (collate->mbheads[i] == NULL)
1692       {
1693         need_undefined = 1;
1694         collate->mbheads[i] = &collate->undefined;
1695       }
1696
1697   /* Now to the wide character case.  */
1698   collate->wcheads.p = 6;
1699   collate->wcheads.q = 10;
1700   wchead_table_init (&collate->wcheads);
1701
1702   collate->wcseqorder.p = 6;
1703   collate->wcseqorder.q = 10;
1704   collseq_table_init (&collate->wcseqorder);
1705
1706   /* Start adding.  */
1707   runp = collate->start;
1708   while (runp != NULL)
1709     {
1710       if (runp->wcs != NULL)
1711         {
1712           struct element_t *e;
1713           struct element_t **eptr;
1714           struct element_t *lastp;
1715
1716           /* Insert the collation sequence value.  */
1717           if (runp->is_character)
1718             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1719                                runp->wcseqorder);
1720
1721           /* Find the point where to insert in the list.  */
1722           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1723           eptr = &e;
1724           lastp = NULL;
1725           while (*eptr != NULL)
1726             {
1727               if ((*eptr)->nwcs < runp->nwcs)
1728                 break;
1729
1730               if ((*eptr)->nwcs == runp->nwcs)
1731                 {
1732                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1733                                    (wchar_t *) runp->wcs, runp->nwcs);
1734
1735                   if (c == 0)
1736                     {
1737                       /* This should not happen.  It means that we have
1738                          two symbols with the same byte sequence.  It is
1739                          of course an error.  */
1740                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1741                                                       (*eptr)->line,
1742                                                       _("\
1743 symbol `%s' has the same encoding as"), (*eptr)->name);
1744                                        error_at_line (0, 0, runp->file,
1745                                                       runp->line,
1746                                                       _("symbol `%s'"),
1747                                                       runp->name));
1748                       goto dont_insertwc;
1749                     }
1750                   else if (c < 0)
1751                     /* Insert it here.  */
1752                     break;
1753                 }
1754
1755               /* To the next entry.  */
1756               lastp = *eptr;
1757               eptr = &(*eptr)->wcnext;
1758             }
1759
1760           /* Set the pointers.  */
1761           runp->wcnext = *eptr;
1762           runp->wclast = lastp;
1763           if (*eptr != NULL)
1764             (*eptr)->wclast = runp;
1765           *eptr = runp;
1766           if (eptr == &e)
1767             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1768         dont_insertwc:
1769           ;
1770         }
1771
1772       /* Up to the next entry.  */
1773       runp = runp->next;
1774     }
1775
1776   collseq_table_finalize (&collate->wcseqorder);
1777
1778   /* Now determine whether the UNDEFINED entry is needed and if yes,
1779      whether it was defined.  */
1780   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1781   if (collate->undefined.file == NULL)
1782     {
1783       if (need_undefined)
1784         {
1785           /* This seems not to be enforced by recent standards.  Don't
1786              emit an error, simply append UNDEFINED at the end.  */
1787           if (0)
1788             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1789
1790           /* Add UNDEFINED at the end.  */
1791           collate->undefined.mborder =
1792             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1793
1794           for (i = 0; i < nrules; ++i)
1795             collate->undefined.mborder[i] = mbact[i]++;
1796         }
1797
1798       /* In any case we will need the definition for the wide character
1799          case.  But we will not complain that it is missing since the
1800          specification strangely enough does not seem to account for
1801          this.  */
1802       collate->undefined.wcorder = wcact++;
1803     }
1804
1805   /* Finally, try to unify the rules for the sections.  Whenever the rules
1806      for a section are the same as those for another section give the
1807      ruleset the same index.  Since there are never many section we can
1808      use an O(n^2) algorithm here.  */
1809   sect = collate->sections;
1810   while (sect != NULL && sect->rules == NULL)
1811     sect = sect->next;
1812   assert (sect != NULL);
1813   ruleidx = 0;
1814   do
1815     {
1816       struct section_list *osect = collate->sections;
1817
1818       while (osect != sect)
1819         if (osect->rules != NULL
1820             && memcmp (osect->rules, sect->rules, nrules) == 0)
1821           break;
1822         else
1823           osect = osect->next;
1824
1825       if (osect == sect)
1826         sect->ruleidx = ruleidx++;
1827       else
1828         sect->ruleidx = osect->ruleidx;
1829
1830       /* Next section.  */
1831       do
1832         sect = sect->next;
1833       while (sect != NULL && sect->rules == NULL);
1834     }
1835   while (sect != NULL);
1836   /* We are currently not prepared for more than 128 rulesets.  But this
1837      should never really be a problem.  */
1838   assert (ruleidx <= 128);
1839 }
1840
1841
1842 static int32_t
1843 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1844                struct element_t *elem)
1845 {
1846   size_t cnt;
1847   int32_t retval;
1848
1849   /* Optimize the use of UNDEFINED.  */
1850   if (elem == &collate->undefined)
1851     /* The weights are already inserted.  */
1852     return 0;
1853
1854   /* This byte can start exactly one collation element and this is
1855      a single byte.  We can directly give the index to the weights.  */
1856   retval = obstack_object_size (pool);
1857
1858   /* Construct the weight.  */
1859   for (cnt = 0; cnt < nrules; ++cnt)
1860     {
1861       char buf[elem->weights[cnt].cnt * 7];
1862       int len = 0;
1863       int i;
1864
1865       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1866         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1867         if (elem->weights[cnt].w[i] != NULL)
1868           len += utf8_encode (&buf[len],
1869                               elem->weights[cnt].w[i]->mborder[cnt]);
1870
1871       /* And add the buffer content.  */
1872       obstack_1grow (pool, len);
1873       obstack_grow (pool, buf, len);
1874     }
1875
1876   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1877 }
1878
1879
1880 static int32_t
1881 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1882                  struct element_t *elem)
1883 {
1884   size_t cnt;
1885   int32_t retval;
1886
1887   /* Optimize the use of UNDEFINED.  */
1888   if (elem == &collate->undefined)
1889     /* The weights are already inserted.  */
1890     return 0;
1891
1892   /* This byte can start exactly one collation element and this is
1893      a single byte.  We can directly give the index to the weights.  */
1894   retval = obstack_object_size (pool) / sizeof (int32_t);
1895
1896   /* Construct the weight.  */
1897   for (cnt = 0; cnt < nrules; ++cnt)
1898     {
1899       int32_t buf[elem->weights[cnt].cnt];
1900       int i;
1901       int32_t j;
1902
1903       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1904         if (elem->weights[cnt].w[i] != NULL)
1905           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1906
1907       /* And add the buffer content.  */
1908       obstack_int32_grow (pool, j);
1909
1910       obstack_grow (pool, buf, j * sizeof (int32_t));
1911     }
1912
1913   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1914 }
1915
1916
1917 void
1918 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
1919                 const char *output_path)
1920 {
1921   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1922   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
1923   struct iovec iov[2 + nelems];
1924   struct locale_file data;
1925   uint32_t idx[nelems];
1926   size_t cnt;
1927   size_t ch;
1928   int32_t tablemb[256];
1929   struct obstack weightpool;
1930   struct obstack extrapool;
1931   struct obstack indirectpool;
1932   struct section_list *sect;
1933   struct collidx_table tablewc;
1934   uint32_t elem_size;
1935   uint32_t *elem_table;
1936   int i;
1937   struct element_t *runp;
1938
1939   data.magic = LIMAGIC (LC_COLLATE);
1940   data.n = nelems;
1941   iov[0].iov_base = (void *) &data;
1942   iov[0].iov_len = sizeof (data);
1943
1944   iov[1].iov_base = (void *) idx;
1945   iov[1].iov_len = sizeof (idx);
1946
1947   idx[0] = iov[0].iov_len + iov[1].iov_len;
1948   cnt = 0;
1949
1950   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
1951   iov[2 + cnt].iov_base = &nrules;
1952   iov[2 + cnt].iov_len = sizeof (uint32_t);
1953   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1954   ++cnt;
1955
1956   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
1957   if (collate == NULL)
1958     {
1959       int32_t dummy = 0;
1960
1961       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1962         {
1963           /* The words have to be handled specially.  */
1964           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
1965             {
1966               iov[2 + cnt].iov_base = &dummy;
1967               iov[2 + cnt].iov_len = sizeof (int32_t);
1968             }
1969           else
1970             {
1971               iov[2 + cnt].iov_base = NULL;
1972               iov[2 + cnt].iov_len = 0;
1973             }
1974
1975           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
1976             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
1977           ++cnt;
1978         }
1979
1980       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
1981
1982       write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
1983
1984       return;
1985     }
1986
1987   obstack_init (&weightpool);
1988   obstack_init (&extrapool);
1989   obstack_init (&indirectpool);
1990
1991   /* Since we are using the sign of an integer to mark indirection the
1992      offsets in the arrays we are indirectly referring to must not be
1993      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
1994   obstack_int32_grow (&extrapool, 0);
1995   obstack_int32_grow (&indirectpool, 0);
1996
1997   /* Prepare the ruleset table.  */
1998   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
1999     if (sect->rules != NULL && sect->ruleidx == i)
2000       {
2001         int j;
2002
2003         obstack_make_room (&weightpool, nrules);
2004
2005         for (j = 0; j < nrules; ++j)
2006           obstack_1grow_fast (&weightpool, sect->rules[j]);
2007         ++i;
2008       }
2009   /* And align the output.  */
2010   i = (nrules * i) % __alignof__ (int32_t);
2011   if (i > 0)
2012     do
2013       obstack_1grow (&weightpool, '\0');
2014     while (++i < __alignof__ (int32_t));
2015
2016   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2017   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2018   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2019   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2020   ++cnt;
2021
2022   /* Generate the 8-bit table.  Walk through the lists of sequences
2023      starting with the same byte and add them one after the other to
2024      the table.  In case we have more than one sequence starting with
2025      the same byte we have to use extra indirection.
2026
2027      First add a record for the NUL byte.  This entry will never be used
2028      so it does not matter.  */
2029   tablemb[0] = 0;
2030
2031   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2032      will probably be used more than once it is good to store the
2033      weights only once.  */
2034   if (collate->undefined.used_in_level != 0)
2035     output_weight (&weightpool, collate, &collate->undefined);
2036
2037   for (ch = 1; ch < 256; ++ch)
2038     if (collate->mbheads[ch]->mbnext == NULL
2039         && collate->mbheads[ch]->nmbs <= 1)
2040       {
2041         tablemb[ch] = output_weight (&weightpool, collate,
2042                                      collate->mbheads[ch]);
2043       }
2044     else
2045       {
2046         /* The entries in the list are sorted by length and then
2047            alphabetically.  This is the order in which we will add the
2048            elements to the collation table.  This allows simply walking
2049            the table in sequence and stopping at the first matching
2050            entry.  Since the longer sequences are coming first in the
2051            list they have the possibility to match first, just as it
2052            has to be.  In the worst case we are walking to the end of
2053            the list where we put, if no singlebyte sequence is defined
2054            in the locale definition, the weights for UNDEFINED.
2055
2056            To reduce the length of the search list we compress them a bit.
2057            This happens by collecting sequences of consecutive byte
2058            sequences in one entry (having and begin and end byte sequence)
2059            and add only one index into the weight table.  We can find the
2060            consecutive entries since they are also consecutive in the list.  */
2061         struct element_t *runp = collate->mbheads[ch];
2062         struct element_t *lastp;
2063
2064         assert ((obstack_object_size (&extrapool)
2065                  & (__alignof__ (int32_t) - 1)) == 0);
2066
2067         tablemb[ch] = -obstack_object_size (&extrapool);
2068
2069         do
2070           {
2071             /* Store the current index in the weight table.  We know that
2072                the current position in the `extrapool' is aligned on a
2073                32-bit address.  */
2074             int32_t weightidx;
2075             int added;
2076
2077             /* Find out wether this is a single entry or we have more than
2078                one consecutive entry.  */
2079             if (runp->mbnext != NULL
2080                 && runp->nmbs == runp->mbnext->nmbs
2081                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2082                 && (runp->mbs[runp->nmbs - 1]
2083                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2084               {
2085                 int i;
2086                 struct element_t *series_startp = runp;
2087                 struct element_t *curp;
2088
2089                 /* Compute how much space we will need.  */
2090                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2091                           + __alignof__ (int32_t) - 1)
2092                          & ~(__alignof__ (int32_t) - 1));
2093                 assert ((obstack_object_size (&extrapool)
2094                          & (__alignof__ (int32_t) - 1)) == 0);
2095                 obstack_make_room (&extrapool, added);
2096
2097                 /* More than one consecutive entry.  We mark this by having
2098                    a negative index into the indirect table.  */
2099                 obstack_int32_grow_fast (&extrapool,
2100                                          -(obstack_object_size (&indirectpool)
2101                                            / sizeof (int32_t)));
2102
2103                 /* Now search first the end of the series.  */
2104                 do
2105                   runp = runp->mbnext;
2106                 while (runp->mbnext != NULL
2107                        && runp->nmbs == runp->mbnext->nmbs
2108                        && memcmp (runp->mbs, runp->mbnext->mbs,
2109                                   runp->nmbs - 1) == 0
2110                        && (runp->mbs[runp->nmbs - 1]
2111                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2112
2113                 /* Now walk backward from here to the beginning.  */
2114                 curp = runp;
2115
2116                 assert (runp->nmbs <= 256);
2117                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2118                 for (i = 1; i < curp->nmbs; ++i)
2119                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2120
2121                 /* Now find the end of the consecutive sequence and
2122                    add all the indeces in the indirect pool.  */
2123                 do
2124                   {
2125                     weightidx = output_weight (&weightpool, collate, curp);
2126                     obstack_int32_grow (&indirectpool, weightidx);
2127
2128                     curp = curp->mblast;
2129                   }
2130                 while (curp != series_startp);
2131
2132                 /* Add the final weight.  */
2133                 weightidx = output_weight (&weightpool, collate, curp);
2134                 obstack_int32_grow (&indirectpool, weightidx);
2135
2136                 /* And add the end byte sequence.  Without length this
2137                    time.  */
2138                 for (i = 1; i < curp->nmbs; ++i)
2139                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2140               }
2141             else
2142               {
2143                 /* A single entry.  Simply add the index and the length and
2144                    string (except for the first character which is already
2145                    tested for).  */
2146                 int i;
2147
2148                 /* Output the weight info.  */
2149                 weightidx = output_weight (&weightpool, collate, runp);
2150
2151                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2152                           + __alignof__ (int32_t) - 1)
2153                          & ~(__alignof__ (int32_t) - 1));
2154                 assert ((obstack_object_size (&extrapool)
2155                          & (__alignof__ (int32_t) - 1)) == 0);
2156                 obstack_make_room (&extrapool, added);
2157
2158                 obstack_int32_grow_fast (&extrapool, weightidx);
2159                 assert (runp->nmbs <= 256);
2160                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2161
2162                 for (i = 1; i < runp->nmbs; ++i)
2163                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2164               }
2165
2166             /* Add alignment bytes if necessary.  */
2167             while ((obstack_object_size (&extrapool)
2168                     & (__alignof__ (int32_t) - 1)) != 0)
2169               obstack_1grow_fast (&extrapool, '\0');
2170
2171             /* Next entry.  */
2172             lastp = runp;
2173             runp = runp->mbnext;
2174           }
2175         while (runp != NULL);
2176
2177         assert ((obstack_object_size (&extrapool)
2178                  & (__alignof__ (int32_t) - 1)) == 0);
2179
2180         /* If the final entry in the list is not a single character we
2181            add an UNDEFINED entry here.  */
2182         if (lastp->nmbs != 1)
2183           {
2184             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2185                          & ~(__alignof__ (int32_t) - 1));
2186             obstack_make_room (&extrapool, added);
2187
2188             obstack_int32_grow_fast (&extrapool, 0);
2189             /* XXX What rule? We just pick the first.  */
2190             obstack_1grow_fast (&extrapool, 0);
2191             /* Length is zero.  */
2192             obstack_1grow_fast (&extrapool, 0);
2193
2194             /* Add alignment bytes if necessary.  */
2195             while ((obstack_object_size (&extrapool)
2196                     & (__alignof__ (int32_t) - 1)) != 0)
2197               obstack_1grow_fast (&extrapool, '\0');
2198           }
2199       }
2200
2201   /* Add padding to the tables if necessary.  */
2202   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2203          != 0)
2204     obstack_1grow (&weightpool, 0);
2205
2206   /* Now add the four tables.  */
2207   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2208   iov[2 + cnt].iov_base = tablemb;
2209   iov[2 + cnt].iov_len = sizeof (tablemb);
2210   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2211   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2212   ++cnt;
2213
2214   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2215   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2216   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2217   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2218   ++cnt;
2219
2220   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2221   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2222   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2223   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2224   ++cnt;
2225
2226   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2227   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2228   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2229   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2230   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2231   ++cnt;
2232
2233
2234   /* Now the same for the wide character table.  We need to store some
2235      more information here.  */
2236   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2237   iov[2 + cnt].iov_base = NULL;
2238   iov[2 + cnt].iov_len = 0;
2239   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2240   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2241   ++cnt;
2242
2243   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2244   iov[2 + cnt].iov_base = NULL;
2245   iov[2 + cnt].iov_len = 0;
2246   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2247   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2248   ++cnt;
2249
2250   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2251   iov[2 + cnt].iov_base = NULL;
2252   iov[2 + cnt].iov_len = 0;
2253   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2254   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2255   ++cnt;
2256
2257   /* Since we are using the sign of an integer to mark indirection the
2258      offsets in the arrays we are indirectly referring to must not be
2259      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2260   obstack_int32_grow (&extrapool, 0);
2261   obstack_int32_grow (&indirectpool, 0);
2262
2263   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2264      will probably be used more than once it is good to store the
2265      weights only once.  */
2266   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2267     abort ();
2268
2269   /* Generate the table.  Walk through the lists of sequences starting
2270      with the same wide character and add them one after the other to
2271      the table.  In case we have more than one sequence starting with
2272      the same byte we have to use extra indirection.  */
2273   {
2274     auto void add_to_tablewc (uint32_t ch, struct element_t *runp);
2275
2276     void add_to_tablewc (uint32_t ch, struct element_t *runp)
2277       {
2278         if (runp->wcnext == NULL && runp->nwcs == 1)
2279           {
2280             int32_t weigthidx = output_weightwc (&weightpool, collate, runp);
2281             collidx_table_add (&tablewc, ch, weigthidx);
2282           }
2283         else
2284           {
2285             /* As for the singlebyte table, we recognize sequences and
2286                compress them.  */
2287             struct element_t *lastp;
2288
2289             collidx_table_add (&tablewc, ch,
2290                                -(obstack_object_size (&extrapool) / sizeof (uint32_t)));
2291
2292             do
2293               {
2294                 /* Store the current index in the weight table.  We know that
2295                    the current position in the `extrapool' is aligned on a
2296                    32-bit address.  */
2297                 int32_t weightidx;
2298                 int added;
2299
2300                 /* Find out wether this is a single entry or we have more than
2301                    one consecutive entry.  */
2302                 if (runp->wcnext != NULL
2303                     && runp->nwcs == runp->wcnext->nwcs
2304                     && wmemcmp ((wchar_t *) runp->wcs,
2305                                 (wchar_t *)runp->wcnext->wcs,
2306                                 runp->nwcs - 1) == 0
2307                     && (runp->wcs[runp->nwcs - 1]
2308                         == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2309                   {
2310                     int i;
2311                     struct element_t *series_startp = runp;
2312                     struct element_t *curp;
2313
2314                     /* Now add first the initial byte sequence.  */
2315                     added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2316                     if (sizeof (int32_t) == sizeof (int))
2317                       obstack_make_room (&extrapool, added);
2318
2319                     /* More than one consecutive entry.  We mark this by having
2320                        a negative index into the indirect table.  */
2321                     obstack_int32_grow_fast (&extrapool,
2322                                              -(obstack_object_size (&indirectpool)
2323                                                / sizeof (int32_t)));
2324                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2325
2326                     do
2327                       runp = runp->wcnext;
2328                     while (runp->wcnext != NULL
2329                            && runp->nwcs == runp->wcnext->nwcs
2330                            && wmemcmp ((wchar_t *) runp->wcs,
2331                                        (wchar_t *)runp->wcnext->wcs,
2332                                        runp->nwcs - 1) == 0
2333                            && (runp->wcs[runp->nwcs - 1]
2334                                == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2335
2336                     /* Now walk backward from here to the beginning.  */
2337                     curp = runp;
2338
2339                     for (i = 1; i < runp->nwcs; ++i)
2340                       obstack_int32_grow_fast (&extrapool, curp->wcs[i]);
2341
2342                     /* Now find the end of the consecutive sequence and
2343                        add all the indeces in the indirect pool.  */
2344                     do
2345                       {
2346                         weightidx = output_weightwc (&weightpool, collate,
2347                                                      curp);
2348                         obstack_int32_grow (&indirectpool, weightidx);
2349
2350                         curp = curp->wclast;
2351                       }
2352                     while (curp != series_startp);
2353
2354                     /* Add the final weight.  */
2355                     weightidx = output_weightwc (&weightpool, collate, curp);
2356                     obstack_int32_grow (&indirectpool, weightidx);
2357
2358                     /* And add the end byte sequence.  Without length this
2359                        time.  */
2360                     for (i = 1; i < curp->nwcs; ++i)
2361                       obstack_int32_grow (&extrapool, curp->wcs[i]);
2362                   }
2363                 else
2364                   {
2365                     /* A single entry.  Simply add the index and the length and
2366                        string (except for the first character which is already
2367                        tested for).  */
2368                     int i;
2369
2370                     /* Output the weight info.  */
2371                     weightidx = output_weightwc (&weightpool, collate, runp);
2372
2373                     added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2374                     if (sizeof (int) == sizeof (int32_t))
2375                       obstack_make_room (&extrapool, added);
2376
2377                     obstack_int32_grow_fast (&extrapool, weightidx);
2378                     obstack_int32_grow_fast (&extrapool, runp->nwcs - 1);
2379                     for (i = 1; i < runp->nwcs; ++i)
2380                       obstack_int32_grow_fast (&extrapool, runp->wcs[i]);
2381                   }
2382
2383                 /* Next entry.  */
2384                 lastp = runp;
2385                 runp = runp->wcnext;
2386               }
2387             while (runp != NULL);
2388           }
2389       }
2390
2391     tablewc.p = 6;
2392     tablewc.q = 10;
2393     collidx_table_init (&tablewc);
2394
2395     wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2396
2397     collidx_table_finalize (&tablewc);
2398   }
2399
2400   /* Now add the four tables.  */
2401   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2402   iov[2 + cnt].iov_base = tablewc.result;
2403   iov[2 + cnt].iov_len = tablewc.result_size;
2404   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2405   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2406   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2407   ++cnt;
2408
2409   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2410   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2411   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2412   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2413   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2414   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2415   ++cnt;
2416
2417   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2418   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2419   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2420   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2421   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2422   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2423   ++cnt;
2424
2425   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2426   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2427   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2428   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2429   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2430   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2431   ++cnt;
2432
2433
2434   /* Finally write the table with collation element names out.  It is
2435      a hash table with a simple function which gets the name of the
2436      character as the input.  One character might have many names.  The
2437      value associated with the name is an index into the weight table
2438      where we are then interested in the first-level weight value.
2439
2440      To determine how large the table should be we are counting the
2441      elements have to put in.  Since we are using internal chaining
2442      using a secondary hash function we have to make the table a bit
2443      larger to avoid extremely long search times.  We can achieve
2444      good results with a 40% larger table than there are entries.  */
2445   elem_size = 0;
2446   runp = collate->start;
2447   while (runp != NULL)
2448     {
2449       if (runp->mbs != NULL && runp->weights != NULL)
2450         /* Yep, the element really counts.  */
2451         ++elem_size;
2452
2453       runp = runp->next;
2454     }
2455   /* Add 40% and find the next prime number.  */
2456   elem_size = MIN (next_prime (elem_size * 1.4), 257);
2457
2458   /* Allocate the table.  Each entry consists of two words: the hash
2459      value and an index in a secondary table which provides the index
2460      into the weight table and the string itself (so that a match can
2461      be determined).  */
2462   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2463                                            elem_size * 2 * sizeof (uint32_t));
2464   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2465
2466   /* Now add the elements.  */
2467   runp = collate->start;
2468   while (runp != NULL)
2469     {
2470       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2471         {
2472           /* Compute the hash value of the name.  */
2473           uint32_t namelen = strlen (runp->name);
2474           uint32_t hash = elem_hash (runp->name, namelen);
2475           size_t idx = hash % elem_size;
2476
2477           if (elem_table[idx * 2] != 0)
2478             {
2479               /* The spot is already take.  Try iterating using the value
2480                  from the secondary hashing function.  */
2481               size_t iter = hash % (elem_size - 2);
2482
2483               do
2484                 {
2485                   idx += iter;
2486                   if (idx >= elem_size)
2487                     idx -= elem_size;
2488                 }
2489               while (elem_table[idx * 2] != 0);
2490             }
2491           /* This is the spot where we will insert the value.  */
2492           elem_table[idx * 2] = hash;
2493           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2494
2495           /* The the string itself including length.  */
2496           obstack_1grow (&extrapool, namelen);
2497           obstack_grow (&extrapool, runp->name, namelen);
2498
2499           /* And the multibyte representation.  */
2500           obstack_1grow (&extrapool, runp->nmbs);
2501           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2502
2503           /* And align again to 32 bits.  */
2504           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2505             obstack_grow (&extrapool, "\0\0",
2506                           (sizeof (int32_t)
2507                            - ((1 + namelen + 1 + runp->nmbs)
2508                               % sizeof (int32_t))));
2509
2510           /* Now some 32-bit values: multibyte collation sequence,
2511              wide char string (including length), and wide char
2512              collation sequence.  */
2513           obstack_int32_grow (&extrapool, runp->mbseqorder);
2514
2515           obstack_int32_grow (&extrapool, runp->nwcs);
2516           obstack_grow (&extrapool, runp->wcs,
2517                         runp->nwcs * sizeof (uint32_t));
2518
2519           obstack_int32_grow (&extrapool, runp->wcseqorder);
2520         }
2521
2522       runp = runp->next;
2523     }
2524
2525   /* Prepare to write out this data.  */
2526   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2527   iov[2 + cnt].iov_base = &elem_size;
2528   iov[2 + cnt].iov_len = sizeof (int32_t);
2529   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2530   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2531   ++cnt;
2532
2533   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2534   iov[2 + cnt].iov_base = elem_table;
2535   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2536   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2537   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2538   ++cnt;
2539
2540   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2541   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2542   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2543   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2544   ++cnt;
2545
2546   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2547   iov[2 + cnt].iov_base = collate->mbseqorder;
2548   iov[2 + cnt].iov_len = 256;
2549   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2550   ++cnt;
2551
2552   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2553   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2554   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2555   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2556   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2557   ++cnt;
2558
2559   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2560   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2561   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2562   ++cnt;
2563
2564   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2565
2566   write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
2567
2568   obstack_free (&weightpool, NULL);
2569   obstack_free (&extrapool, NULL);
2570   obstack_free (&indirectpool, NULL);
2571 }
2572
2573
2574 void
2575 collate_read (struct linereader *ldfile, struct localedef_t *result,
2576               const struct charmap_t *charmap, const char *repertoire_name,
2577               int ignore_content)
2578 {
2579   struct repertoire_t *repertoire = NULL;
2580   struct locale_collate_t *collate;
2581   struct token *now;
2582   struct token *arg = NULL;
2583   enum token_t nowtok;
2584   enum token_t was_ellipsis = tok_none;
2585   struct localedef_t *copy_locale = NULL;
2586   /* Parsing state:
2587      0 - start
2588      1 - between `order-start' and `order-end'
2589      2 - after `order-end'
2590      3 - after `reorder-after', waiting for `reorder-end'
2591      4 - after `reorder-end'
2592      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2593      6 - after `reorder-sections-end'
2594   */
2595   int state = 0;
2596
2597   /* Get the repertoire we have to use.  */
2598   if (repertoire_name != NULL)
2599     repertoire = repertoire_read (repertoire_name);
2600
2601   /* The rest of the line containing `LC_COLLATE' must be free.  */
2602   lr_ignore_rest (ldfile, 1);
2603
2604   do
2605     {
2606       now = lr_token (ldfile, charmap, result, NULL, verbose);
2607       nowtok = now->tok;
2608     }
2609   while (nowtok == tok_eol);
2610
2611   if (nowtok == tok_copy)
2612     {
2613       state = 2;
2614       now = lr_token (ldfile, charmap, result, NULL, verbose);
2615       if (now->tok != tok_string)
2616         {
2617           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2618
2619         skip_category:
2620           do
2621             now = lr_token (ldfile, charmap, result, NULL, verbose);
2622           while (now->tok != tok_eof && now->tok != tok_end);
2623
2624           if (now->tok != tok_eof
2625               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2626                   now->tok == tok_eof))
2627             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2628           else if (now->tok != tok_lc_collate)
2629             {
2630               lr_error (ldfile, _("\
2631 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2632               lr_ignore_rest (ldfile, 0);
2633             }
2634           else
2635             lr_ignore_rest (ldfile, 1);
2636
2637           return;
2638         }
2639
2640       if (! ignore_content)
2641         {
2642           /* Get the locale definition.  */
2643           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2644                                      repertoire_name, charmap, NULL);
2645           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2646             {
2647               /* Not yet loaded.  So do it now.  */
2648               if (locfile_read (copy_locale, charmap) != 0)
2649                 goto skip_category;
2650             }
2651         }
2652
2653       lr_ignore_rest (ldfile, 1);
2654
2655       now = lr_token (ldfile, charmap, result, NULL, verbose);
2656       nowtok = now->tok;
2657     }
2658
2659   /* Prepare the data structures.  */
2660   collate_startup (ldfile, result, copy_locale, ignore_content);
2661   collate = result->categories[LC_COLLATE].collate;
2662
2663   while (1)
2664     {
2665       char ucs4buf[10];
2666       char *symstr;
2667       size_t symlen;
2668
2669       /* Of course we don't proceed beyond the end of file.  */
2670       if (nowtok == tok_eof)
2671         break;
2672
2673       /* Ingore empty lines.  */
2674       if (nowtok == tok_eol)
2675         {
2676           now = lr_token (ldfile, charmap, result, NULL, verbose);
2677           nowtok = now->tok;
2678           continue;
2679         }
2680
2681       switch (nowtok)
2682         {
2683         case tok_copy:
2684           /* Allow copying other locales.  */
2685           now = lr_token (ldfile, charmap, result, NULL, verbose);
2686           if (now->tok != tok_string)
2687             goto err_label;
2688
2689           if (! ignore_content)
2690             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2691                          charmap, result);
2692
2693           lr_ignore_rest (ldfile, 1);
2694           break;
2695
2696         case tok_coll_weight_max:
2697           /* Ignore the rest of the line if we don't need the input of
2698              this line.  */
2699           if (ignore_content)
2700             {
2701               lr_ignore_rest (ldfile, 0);
2702               break;
2703             }
2704
2705           if (state != 0)
2706             goto err_label;
2707
2708           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2709           if (arg->tok != tok_number)
2710             goto err_label;
2711           if (collate->col_weight_max != -1)
2712             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2713                       "LC_COLLATE", "col_weight_max");
2714           else
2715             collate->col_weight_max = arg->val.num;
2716           lr_ignore_rest (ldfile, 1);
2717           break;
2718
2719         case tok_section_symbol:
2720           /* Ignore the rest of the line if we don't need the input of
2721              this line.  */
2722           if (ignore_content)
2723             {
2724               lr_ignore_rest (ldfile, 0);
2725               break;
2726             }
2727
2728           if (state != 0)
2729             goto err_label;
2730
2731           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2732           if (arg->tok != tok_bsymbol)
2733             goto err_label;
2734           else if (!ignore_content)
2735             {
2736               /* Check whether this section is already known.  */
2737               struct section_list *known = collate->sections;
2738               while (known != NULL)
2739                 {
2740                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2741                     break;
2742                   known = known->next;
2743                 }
2744
2745               if (known != NULL)
2746                 {
2747                   lr_error (ldfile,
2748                             _("%s: duplicate declaration of section `%s'"),
2749                             "LC_COLLATE", arg->val.str.startmb);
2750                   free (arg->val.str.startmb);
2751                 }
2752               else
2753                 collate->sections = make_seclist_elem (collate,
2754                                                        arg->val.str.startmb,
2755                                                        collate->sections);
2756
2757               lr_ignore_rest (ldfile, known == NULL);
2758             }
2759           else
2760             {
2761               free (arg->val.str.startmb);
2762               lr_ignore_rest (ldfile, 0);
2763             }
2764           break;
2765
2766         case tok_collating_element:
2767           /* Ignore the rest of the line if we don't need the input of
2768              this line.  */
2769           if (ignore_content)
2770             {
2771               lr_ignore_rest (ldfile, 0);
2772               break;
2773             }
2774
2775           if (state != 0 && state != 2)
2776             goto err_label;
2777
2778           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2779           if (arg->tok != tok_bsymbol)
2780             goto err_label;
2781           else
2782             {
2783               const char *symbol = arg->val.str.startmb;
2784               size_t symbol_len = arg->val.str.lenmb;
2785
2786               /* Next the `from' keyword.  */
2787               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2788               if (arg->tok != tok_from)
2789                 {
2790                   free ((char *) symbol);
2791                   goto err_label;
2792                 }
2793
2794               ldfile->return_widestr = 1;
2795               ldfile->translate_strings = 1;
2796
2797               /* Finally the string with the replacement.  */
2798               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2799
2800               ldfile->return_widestr = 0;
2801               ldfile->translate_strings = 0;
2802
2803               if (arg->tok != tok_string)
2804                 goto err_label;
2805
2806               if (!ignore_content && symbol != NULL)
2807                 {
2808                   /* The name is already defined.  */
2809                   if (check_duplicate (ldfile, collate, charmap,
2810                                        repertoire, symbol, symbol_len))
2811                     goto col_elem_free;
2812
2813                   if (arg->val.str.startmb != NULL)
2814                     insert_entry (&collate->elem_table, symbol, symbol_len,
2815                                   new_element (collate,
2816                                                arg->val.str.startmb,
2817                                                arg->val.str.lenmb - 1,
2818                                                arg->val.str.startwc,
2819                                                symbol, symbol_len, 0));
2820                 }
2821               else
2822                 {
2823                 col_elem_free:
2824                   if (symbol != NULL)
2825                     free ((char *) symbol);
2826                   if (arg->val.str.startmb != NULL)
2827                     free (arg->val.str.startmb);
2828                   if (arg->val.str.startwc != NULL)
2829                     free (arg->val.str.startwc);
2830                 }
2831               lr_ignore_rest (ldfile, 1);
2832             }
2833           break;
2834
2835         case tok_collating_symbol:
2836           /* Ignore the rest of the line if we don't need the input of
2837              this line.  */
2838           if (ignore_content)
2839             {
2840               lr_ignore_rest (ldfile, 0);
2841               break;
2842             }
2843
2844           if (state != 0 && state != 2)
2845             goto err_label;
2846
2847           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2848           if (arg->tok != tok_bsymbol)
2849             goto err_label;
2850           else
2851             {
2852               char *symbol = arg->val.str.startmb;
2853               size_t symbol_len = arg->val.str.lenmb;
2854               char *endsymbol = NULL;
2855               size_t endsymbol_len = 0;
2856               enum token_t ellipsis = tok_none;
2857
2858               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2859               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2860                 {
2861                   ellipsis = arg->tok;
2862
2863                   arg = lr_token (ldfile, charmap, result, repertoire,
2864                                   verbose);
2865                   if (arg->tok != tok_bsymbol)
2866                     {
2867                       free (symbol);
2868                       goto err_label;
2869                     }
2870
2871                   endsymbol = arg->val.str.startmb;
2872                   endsymbol_len = arg->val.str.lenmb;
2873
2874                   lr_ignore_rest (ldfile, 1);
2875                 }
2876               else if (arg->tok != tok_eol)
2877                 {
2878                   free (symbol);
2879                   goto err_label;
2880                 }
2881
2882               if (!ignore_content)
2883                 {
2884                   if (symbol == NULL
2885                       || (ellipsis != tok_none && endsymbol == NULL))
2886                     {
2887                       lr_error (ldfile, _("\
2888 %s: unknown character in collating symbol name"),
2889                                 "LC_COLLATE");
2890                       goto col_sym_free;
2891                     }
2892                   else if (ellipsis == tok_none)
2893                     {
2894                       /* A single symbol, no ellipsis.  */
2895                       if (check_duplicate (ldfile, collate, charmap,
2896                                            repertoire, symbol, symbol_len))
2897                         /* The name is already defined.  */
2898                         goto col_sym_free;
2899
2900                       insert_entry (&collate->sym_table, symbol, symbol_len,
2901                                     new_symbol (collate, symbol, symbol_len));
2902                     }
2903                   else if (symbol_len != endsymbol_len)
2904                     {
2905                     col_sym_inv_range:
2906                       lr_error (ldfile,
2907                                 _("invalid names for character range"));
2908                       goto col_sym_free;
2909                     }
2910                   else
2911                     {
2912                       /* Oh my, we have to handle an ellipsis.  First, as
2913                          usual, determine the common prefix and then
2914                          convert the rest into a range.  */
2915                       size_t prefixlen;
2916                       unsigned long int from;
2917                       unsigned long int to;
2918                       char *endp;
2919
2920                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2921                         if (symbol[prefixlen] != endsymbol[prefixlen])
2922                           break;
2923
2924                       /* Convert the rest into numbers.  */
2925                       symbol[symbol_len] = '\0';
2926                       from = strtoul (&symbol[prefixlen], &endp,
2927                                       ellipsis == tok_ellipsis2 ? 16 : 10);
2928                       if (*endp != '\0')
2929                         goto col_sym_inv_range;
2930
2931                       endsymbol[symbol_len] = '\0';
2932                       to = strtoul (&endsymbol[prefixlen], &endp,
2933                                     ellipsis == tok_ellipsis2 ? 16 : 10);
2934                       if (*endp != '\0')
2935                         goto col_sym_inv_range;
2936
2937                       if (from > to)
2938                         goto col_sym_inv_range;
2939
2940                       /* Now loop over all entries.  */
2941                       while (from <= to)
2942                         {
2943                           char *symbuf;
2944
2945                           symbuf = (char *) obstack_alloc (&collate->mempool,
2946                                                            symbol_len + 1);
2947
2948                           /* Create the name.  */
2949                           sprintf (symbuf,
2950                                    ellipsis == tok_ellipsis2
2951                                    ? "%.*s%.*lX" : "%.*s%.*lu",
2952                                    (int) prefixlen, symbol,
2953                                    (int) (symbol_len - prefixlen), from);
2954
2955                           if (check_duplicate (ldfile, collate, charmap,
2956                                                repertoire, symbuf, symbol_len))
2957                             /* The name is already defined.  */
2958                             goto col_sym_free;
2959
2960                           insert_entry (&collate->sym_table, symbuf,
2961                                         symbol_len,
2962                                         new_symbol (collate, symbuf,
2963                                                     symbol_len));
2964
2965                           /* Increment the counter.  */
2966                           ++from;
2967                         }
2968
2969                       goto col_sym_free;
2970                     }
2971                 }
2972               else
2973                 {
2974                 col_sym_free:
2975                   if (symbol != NULL)
2976                     free (symbol);
2977                   if (endsymbol != NULL)
2978                     free (endsymbol);
2979                 }
2980             }
2981           break;
2982
2983         case tok_symbol_equivalence:
2984           /* Ignore the rest of the line if we don't need the input of
2985              this line.  */
2986           if (ignore_content)
2987             {
2988               lr_ignore_rest (ldfile, 0);
2989               break;
2990             }
2991
2992           if (state != 0)
2993             goto err_label;
2994
2995           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2996           if (arg->tok != tok_bsymbol)
2997             goto err_label;
2998           else
2999             {
3000               const char *newname = arg->val.str.startmb;
3001               size_t newname_len = arg->val.str.lenmb;
3002               const char *symname;
3003               size_t symname_len;
3004               struct symbol_t *symval;
3005
3006               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3007               if (arg->tok != tok_bsymbol)
3008                 {
3009                   if (newname != NULL)
3010                     free ((char *) newname);
3011                   goto err_label;
3012                 }
3013
3014               symname = arg->val.str.startmb;
3015               symname_len = arg->val.str.lenmb;
3016
3017               if (newname == NULL)
3018                 {
3019                   lr_error (ldfile, _("\
3020 %s: unknown character in equivalent definition name"),
3021                             "LC_COLLATE");
3022
3023                 sym_equiv_free:
3024                   if (newname != NULL)
3025                     free ((char *) newname);
3026                   if (symname != NULL)
3027                     free ((char *) symname);
3028                   break;
3029                 }
3030               if (symname == NULL)
3031                 {
3032                   lr_error (ldfile, _("\
3033 %s: unknown character in equivalent definition value"),
3034                             "LC_COLLATE");
3035                   goto sym_equiv_free;
3036                 }
3037
3038               /* See whether the symbol name is already defined.  */
3039               if (find_entry (&collate->sym_table, symname, symname_len,
3040                               (void **) &symval) != 0)
3041                 {
3042                   lr_error (ldfile, _("\
3043 %s: unknown symbol `%s' in equivalent definition"),
3044                             "LC_COLLATE", symname);
3045                   goto col_sym_free;
3046                 }
3047
3048               if (insert_entry (&collate->sym_table,
3049                                 newname, newname_len, symval) < 0)
3050                 {
3051                   lr_error (ldfile, _("\
3052 error while adding equivalent collating symbol"));
3053                   goto sym_equiv_free;
3054                 }
3055
3056               free ((char *) symname);
3057             }
3058           lr_ignore_rest (ldfile, 1);
3059           break;
3060
3061         case tok_script:
3062           /* We get told about the scripts we know.  */
3063           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3064           if (arg->tok != tok_bsymbol)
3065             goto err_label;
3066           else
3067             {
3068               struct section_list *runp = collate->known_sections;
3069               char *name;
3070
3071               while (runp != NULL)
3072                 if (strncmp (runp->name, arg->val.str.startmb,
3073                              arg->val.str.lenmb) == 0
3074                     && runp->name[arg->val.str.lenmb] == '\0')
3075                   break;
3076                 else
3077                   runp = runp->def_next;
3078
3079               if (runp != NULL)
3080                 {
3081                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3082                             runp->name);
3083                   lr_ignore_rest (ldfile, 0);
3084                   break;
3085                 }
3086
3087               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3088               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3089               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3090               name[arg->val.str.lenmb] = '\0';
3091               runp->name = name;
3092
3093               runp->def_next = collate->known_sections;
3094               collate->known_sections = runp;
3095             }
3096           lr_ignore_rest (ldfile, 1);
3097           break;
3098
3099         case tok_order_start:
3100           /* Ignore the rest of the line if we don't need the input of
3101              this line.  */
3102           if (ignore_content)
3103             {
3104               lr_ignore_rest (ldfile, 0);
3105               break;
3106             }
3107
3108           if (state != 0 && state != 1)
3109             goto err_label;
3110           state = 1;
3111
3112           /* The 14652 draft does not specify whether all `order_start' lines
3113              must contain the same number of sort-rules, but 14651 does.  So
3114              we require this here as well.  */
3115           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3116           if (arg->tok == tok_bsymbol)
3117             {
3118               /* This better should be a section name.  */
3119               struct section_list *sp = collate->known_sections;
3120               while (sp != NULL
3121                      && (sp->name == NULL
3122                          || strncmp (sp->name, arg->val.str.startmb,
3123                                      arg->val.str.lenmb) != 0
3124                          || sp->name[arg->val.str.lenmb] != '\0'))
3125                 sp = sp->def_next;
3126
3127               if (sp == NULL)
3128                 {
3129                   lr_error (ldfile, _("\
3130 %s: unknown section name `%s'"),
3131                             "LC_COLLATE", arg->val.str.startmb);
3132                   /* We use the error section.  */
3133                   collate->current_section = &collate->error_section;
3134
3135                   if (collate->error_section.first == NULL)
3136                     {
3137                       /* Insert &collate->error_section at the end of
3138                          the collate->sections list.  */
3139                       if (collate->sections == NULL)
3140                         collate->sections = &collate->error_section;
3141                       else
3142                         {
3143                           sp = collate->sections;
3144                           while (sp->next != NULL)
3145                             sp = sp->next;
3146
3147                           sp->next = &collate->error_section;
3148                         }
3149                       collate->error_section.next = NULL;
3150                     }
3151                 }
3152               else
3153                 {
3154                   /* One should not be allowed to open the same
3155                      section twice.  */
3156                   if (sp->first != NULL)
3157                     lr_error (ldfile, _("\
3158 %s: multiple order definitions for section `%s'"),
3159                               "LC_COLLATE", sp->name);
3160                   else
3161                     {
3162                       /* Insert sp in the collate->sections list,
3163                          right after collate->current_section.  */
3164                       if (collate->current_section == NULL)
3165                         collate->current_section = sp;
3166                       else
3167                         {
3168                           sp->next = collate->current_section->next;
3169                           collate->current_section->next = sp;
3170                         }
3171                     }
3172
3173                   /* Next should come the end of the line or a semicolon.  */
3174                   arg = lr_token (ldfile, charmap, result, repertoire,
3175                                   verbose);
3176                   if (arg->tok == tok_eol)
3177                     {
3178                       uint32_t cnt;
3179
3180                       /* This means we have exactly one rule: `forward'.  */
3181                       if (nrules > 1)
3182                         lr_error (ldfile, _("\
3183 %s: invalid number of sorting rules"),
3184                                   "LC_COLLATE");
3185                       else
3186                         nrules = 1;
3187                       sp->rules = obstack_alloc (&collate->mempool,
3188                                                  (sizeof (enum coll_sort_rule)
3189                                                   * nrules));
3190                       for (cnt = 0; cnt < nrules; ++cnt)
3191                         sp->rules[cnt] = sort_forward;
3192
3193                       /* Next line.  */
3194                       break;
3195                     }
3196
3197                   /* Get the next token.  */
3198                   arg = lr_token (ldfile, charmap, result, repertoire,
3199                                   verbose);
3200                 }
3201             }
3202           else
3203             {
3204               /* There is no section symbol.  Therefore we use the unnamed
3205                  section.  */
3206               collate->current_section = &collate->unnamed_section;
3207
3208               if (collate->unnamed_section.first != NULL)
3209                 lr_error (ldfile, _("\
3210 %s: multiple order definitions for unnamed section"),
3211                           "LC_COLLATE");
3212               else
3213                 {
3214                   /* Insert &collate->unnamed_section at the beginning of
3215                      the collate->sections list.  */
3216                   collate->unnamed_section.next = collate->sections;
3217                   collate->sections = &collate->unnamed_section;
3218                 }
3219             }
3220
3221           /* Now read the direction names.  */
3222           read_directions (ldfile, arg, charmap, repertoire, result);
3223
3224           /* From now we need the strings untranslated.  */
3225           ldfile->translate_strings = 0;
3226           break;
3227
3228         case tok_order_end:
3229           /* Ignore the rest of the line if we don't need the input of
3230              this line.  */
3231           if (ignore_content)
3232             {
3233               lr_ignore_rest (ldfile, 0);
3234               break;
3235             }
3236
3237           if (state != 1)
3238             goto err_label;
3239
3240           /* Handle ellipsis at end of list.  */
3241           if (was_ellipsis != tok_none)
3242             {
3243               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3244                                repertoire, result);
3245               was_ellipsis = tok_none;
3246             }
3247
3248           state = 2;
3249           lr_ignore_rest (ldfile, 1);
3250           break;
3251
3252         case tok_reorder_after:
3253           /* Ignore the rest of the line if we don't need the input of
3254              this line.  */
3255           if (ignore_content)
3256             {
3257               lr_ignore_rest (ldfile, 0);
3258               break;
3259             }
3260
3261           if (state == 1)
3262             {
3263               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3264                         "LC_COLLATE");
3265               state = 2;
3266
3267               /* Handle ellipsis at end of list.  */
3268               if (was_ellipsis != tok_none)
3269                 {
3270                   handle_ellipsis (ldfile, arg->val.str.startmb,
3271                                    arg->val.str.lenmb, was_ellipsis, charmap,
3272                                    repertoire, result);
3273                   was_ellipsis = tok_none;
3274                 }
3275             }
3276           else if (state != 2 && state != 3)
3277             goto err_label;
3278           state = 3;
3279
3280           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3281           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3282             {
3283               /* Find this symbol in the sequence table.  */
3284               char ucsbuf[10];
3285               char *startmb;
3286               size_t lenmb;
3287               struct element_t *insp;
3288               int no_error = 1;
3289
3290               if (arg->tok == tok_bsymbol)
3291                 {
3292                   startmb = arg->val.str.startmb;
3293                   lenmb = arg->val.str.lenmb;
3294                 }
3295               else
3296                 {
3297                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3298                   startmb = ucsbuf;
3299                   lenmb = 9;
3300                 }
3301
3302               if (find_entry (&collate->seq_table, startmb, lenmb,
3303                               (void **) &insp) == 0)
3304                 /* Yes, the symbol exists.  Simply point the cursor
3305                    to it.  */
3306                 collate->cursor = insp;
3307               else
3308                 {
3309                   struct symbol_t *symbp;
3310
3311                   if (find_entry (&collate->sym_table, startmb, lenmb,
3312                                   (void **) &symbp) == 0)
3313                     {
3314                       if (symbp->order->last != NULL
3315                           || symbp->order->next != NULL)
3316                         collate->cursor = symbp->order;
3317                       else
3318                         {
3319                           /* This is a collating symbol but its position
3320                              is not yet defined.  */
3321                           lr_error (ldfile, _("\
3322 %s: order for collating symbol %.*s not yet defined"),
3323                                     "LC_COLLATE", (int) lenmb, startmb);
3324                           collate->cursor = NULL;
3325                           no_error = 0;
3326                         }
3327                     }
3328                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3329                                        (void **) &insp) == 0)
3330                     {
3331                       if (insp->last != NULL || insp->next != NULL)
3332                         collate->cursor = insp;
3333                       else
3334                         {
3335                           /* This is a collating element but its position
3336                              is not yet defined.  */
3337                           lr_error (ldfile, _("\
3338 %s: order for collating element %.*s not yet defined"),
3339                                     "LC_COLLATE", (int) lenmb, startmb);
3340                           collate->cursor = NULL;
3341                           no_error = 0;
3342                         }
3343                     }
3344                   else
3345                     {
3346                       /* This is bad.  The symbol after which we have to
3347                          insert does not exist.  */
3348                       lr_error (ldfile, _("\
3349 %s: cannot reorder after %.*s: symbol not known"),
3350                                 "LC_COLLATE", (int) lenmb, startmb);
3351                       collate->cursor = NULL;
3352                       no_error = 0;
3353                     }
3354                 }
3355
3356               lr_ignore_rest (ldfile, no_error);
3357             }
3358           else
3359             /* This must not happen.  */
3360             goto err_label;
3361           break;
3362
3363         case tok_reorder_end:
3364           /* Ignore the rest of the line if we don't need the input of
3365              this line.  */
3366           if (ignore_content)
3367             break;
3368
3369           if (state != 3)
3370             goto err_label;
3371           state = 4;
3372           lr_ignore_rest (ldfile, 1);
3373           break;
3374
3375         case tok_reorder_sections_after:
3376           /* Ignore the rest of the line if we don't need the input of
3377              this line.  */
3378           if (ignore_content)
3379             {
3380               lr_ignore_rest (ldfile, 0);
3381               break;
3382             }
3383
3384           if (state == 1)
3385             {
3386               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3387                         "LC_COLLATE");
3388               state = 2;
3389
3390               /* Handle ellipsis at end of list.  */
3391               if (was_ellipsis != tok_none)
3392                 {
3393                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3394                                    repertoire, result);
3395                   was_ellipsis = tok_none;
3396                 }
3397             }
3398           else if (state == 3)
3399             {
3400               WITH_CUR_LOCALE (error (0, 0, _("\
3401 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3402               state = 4;
3403             }
3404           else if (state != 2 && state != 4)
3405             goto err_label;
3406           state = 5;
3407
3408           /* Get the name of the sections we are adding after.  */
3409           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3410           if (arg->tok == tok_bsymbol)
3411             {
3412               /* Now find a section with this name.  */
3413               struct section_list *runp = collate->sections;
3414
3415               while (runp != NULL)
3416                 {
3417                   if (runp->name != NULL
3418                       && strlen (runp->name) == arg->val.str.lenmb
3419                       && memcmp (runp->name, arg->val.str.startmb,
3420                                  arg->val.str.lenmb) == 0)
3421                     break;
3422
3423                   runp = runp->next;
3424                 }
3425
3426               if (runp != NULL)
3427                 collate->current_section = runp;
3428               else
3429                 {
3430                   /* This is bad.  The section after which we have to
3431                      reorder does not exist.  Therefore we cannot
3432                      process the whole rest of this reorder
3433                      specification.  */
3434                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3435                             "LC_COLLATE", (int) arg->val.str.lenmb,
3436                             arg->val.str.startmb);
3437
3438                   do
3439                     {
3440                       lr_ignore_rest (ldfile, 0);
3441
3442                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3443                     }
3444                   while (now->tok == tok_reorder_sections_after
3445                          || now->tok == tok_reorder_sections_end
3446                          || now->tok == tok_end);
3447
3448                   /* Process the token we just saw.  */
3449                   nowtok = now->tok;
3450                   continue;
3451                 }
3452             }
3453           else
3454             /* This must not happen.  */
3455             goto err_label;
3456           break;
3457
3458         case tok_reorder_sections_end:
3459           /* Ignore the rest of the line if we don't need the input of
3460              this line.  */
3461           if (ignore_content)
3462             break;
3463
3464           if (state != 5)
3465             goto err_label;
3466           state = 6;
3467           lr_ignore_rest (ldfile, 1);
3468           break;
3469
3470         case tok_bsymbol:
3471         case tok_ucs4:
3472           /* Ignore the rest of the line if we don't need the input of
3473              this line.  */
3474           if (ignore_content)
3475             {
3476               lr_ignore_rest (ldfile, 0);
3477               break;
3478             }
3479
3480           if (state != 0 && state != 1 && state != 3 && state != 5)
3481             goto err_label;
3482
3483           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3484             goto err_label;
3485
3486           if (nowtok == tok_ucs4)
3487             {
3488               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3489               symstr = ucs4buf;
3490               symlen = 9;
3491             }
3492           else
3493             {
3494               symstr = arg->val.str.startmb;
3495               symlen = arg->val.str.lenmb;
3496             }
3497
3498           if (state == 0)
3499             {
3500               /* We are outside an `order_start' region.  This means
3501                  we must only accept definitions of values for
3502                  collation symbols since these are purely abstract
3503                  values and don't need directions associated.  */
3504               struct element_t *seqp;
3505
3506               if (find_entry (&collate->seq_table, symstr, symlen,
3507                               (void **) &seqp) == 0)
3508                 {
3509                   /* It's already defined.  First check whether this
3510                      is really a collating symbol.  */
3511                   if (seqp->is_character)
3512                     goto err_label;
3513
3514                   goto move_entry;
3515                 }
3516               else
3517                 {
3518                   void *result;
3519
3520                   if (find_entry (&collate->sym_table, symstr, symlen,
3521                                   &result) != 0)
3522                     /* No collating symbol, it's an error.  */
3523                     goto err_label;
3524
3525                   /* Maybe this is the first time we define a symbol
3526                      value and it is before the first actual section.  */
3527                   if (collate->sections == NULL)
3528                     collate->sections = collate->current_section =
3529                       &collate->symbol_section;
3530                 }
3531
3532               if (was_ellipsis != tok_none)
3533                 {
3534
3535                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3536                                    charmap, repertoire, result);
3537
3538                   /* Remember that we processed the ellipsis.  */
3539                   was_ellipsis = tok_none;
3540
3541                   /* And don't add the value a second time.  */
3542                   break;
3543                 }
3544             }
3545           else if (state == 3)
3546             {
3547               /* It is possible that we already have this collation sequence.
3548                  In this case we move the entry.  */
3549               struct element_t *seqp;
3550               void *sym;
3551
3552               /* If the symbol after which we have to insert was not found
3553                  ignore all entries.  */
3554               if (collate->cursor == NULL)
3555                 {
3556                   lr_ignore_rest (ldfile, 0);
3557                   break;
3558                 }
3559
3560               if (find_entry (&collate->seq_table, symstr, symlen,
3561                               (void **) &seqp) == 0)
3562                 goto move_entry;
3563
3564               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3565                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3566                 goto move_entry;
3567
3568               if (find_entry (&collate->elem_table, symstr, symlen,
3569                               (void **) &seqp) == 0
3570                   && (seqp->last != NULL || seqp->next != NULL
3571                       || (collate->start != NULL && seqp == collate->start)))
3572                 {
3573                 move_entry:
3574                   /* Remove the entry from the old position.  */
3575                   if (seqp->last == NULL)
3576                     collate->start = seqp->next;
3577                   else
3578                     seqp->last->next = seqp->next;
3579                   if (seqp->next != NULL)
3580                     seqp->next->last = seqp->last;
3581
3582                   /* We also have to check whether this entry is the
3583                      first or last of a section.  */
3584                   if (seqp->section->first == seqp)
3585                     {
3586                       if (seqp->section->first == seqp->section->last)
3587                         /* This section has no content anymore.  */
3588                         seqp->section->first = seqp->section->last = NULL;
3589                       else
3590                         seqp->section->first = seqp->next;
3591                     }
3592                   else if (seqp->section->last == seqp)
3593                     seqp->section->last = seqp->last;
3594
3595                   /* Now insert it in the new place.  */
3596                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3597                                   tok_none);
3598                   break;
3599                 }
3600
3601               /* Otherwise we just add a new entry.  */
3602             }
3603           else if (state == 5)
3604             {
3605               /* We are reordering sections.  Find the named section.  */
3606               struct section_list *runp = collate->sections;
3607               struct section_list *prevp = NULL;
3608
3609               while (runp != NULL)
3610                 {
3611                   if (runp->name != NULL
3612                       && strlen (runp->name) == symlen
3613                       && memcmp (runp->name, symstr, symlen) == 0)
3614                     break;
3615
3616                   prevp = runp;
3617                   runp = runp->next;
3618                 }
3619
3620               if (runp == NULL)
3621                 {
3622                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3623                             "LC_COLLATE", (int) symlen, symstr);
3624                   lr_ignore_rest (ldfile, 0);
3625                 }
3626               else
3627                 {
3628                   if (runp != collate->current_section)
3629                     {
3630                       /* Remove the named section from the old place and
3631                          insert it in the new one.  */
3632                       prevp->next = runp->next;
3633
3634                       runp->next = collate->current_section->next;
3635                       collate->current_section->next = runp;
3636                       collate->current_section = runp;
3637                     }
3638
3639                   /* Process the rest of the line which might change
3640                      the collation rules.  */
3641                   arg = lr_token (ldfile, charmap, result, repertoire,
3642                                   verbose);
3643                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3644                     read_directions (ldfile, arg, charmap, repertoire,
3645                                      result);
3646                 }
3647               break;
3648             }
3649           else if (was_ellipsis != tok_none)
3650             {
3651               /* Using the information in the `ellipsis_weight'
3652                  element and this and the last value we have to handle
3653                  the ellipsis now.  */
3654               assert (state == 1);
3655
3656               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3657                                repertoire, result);
3658
3659               /* Remember that we processed the ellipsis.  */
3660               was_ellipsis = tok_none;
3661
3662               /* And don't add the value a second time.  */
3663               break;
3664             }
3665
3666           /* Now insert in the new place.  */
3667           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3668           break;
3669
3670         case tok_undefined:
3671           /* Ignore the rest of the line if we don't need the input of
3672              this line.  */
3673           if (ignore_content)
3674             {
3675               lr_ignore_rest (ldfile, 0);
3676               break;
3677             }
3678
3679           if (state != 1)
3680             goto err_label;
3681
3682           if (was_ellipsis != tok_none)
3683             {
3684               lr_error (ldfile,
3685                         _("%s: cannot have `%s' as end of ellipsis range"),
3686                         "LC_COLLATE", "UNDEFINED");
3687
3688               unlink_element (collate);
3689               was_ellipsis = tok_none;
3690             }
3691
3692           /* See whether UNDEFINED already appeared somewhere.  */
3693           if (collate->undefined.next != NULL
3694               || &collate->undefined == collate->cursor)
3695             {
3696               lr_error (ldfile,
3697                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3698                         "LC_COLLATE", 9, "UNDEFINED",
3699                         collate->undefined.file,
3700                         collate->undefined.line);
3701               lr_ignore_rest (ldfile, 0);
3702             }
3703           else
3704             /* Parse the weights.  */
3705              insert_weights (ldfile, &collate->undefined, charmap,
3706                              repertoire, result, tok_none);
3707           break;
3708
3709         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3710         case tok_ellipsis3: /* absolute ellipsis */
3711         case tok_ellipsis4: /* symbolic decimal ellipsis */
3712           /* This is the symbolic (decimal or hexadecimal) or absolute
3713              ellipsis.  */
3714           if (was_ellipsis != tok_none)
3715             goto err_label;
3716
3717           if (state != 0 && state != 1 && state != 3)
3718             goto err_label;
3719
3720           was_ellipsis = nowtok;
3721
3722           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3723                           repertoire, result, nowtok);
3724           break;
3725
3726         case tok_end:
3727           /* Next we assume `LC_COLLATE'.  */
3728           if (!ignore_content)
3729             {
3730               if (state == 0)
3731                 /* We must either see a copy statement or have
3732                    ordering values.  */
3733                 lr_error (ldfile,
3734                           _("%s: empty category description not allowed"),
3735                           "LC_COLLATE");
3736               else if (state == 1)
3737                 {
3738                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3739                             "LC_COLLATE");
3740
3741                   /* Handle ellipsis at end of list.  */
3742                   if (was_ellipsis != tok_none)
3743                     {
3744                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3745                                        repertoire, result);
3746                       was_ellipsis = tok_none;
3747                     }
3748                 }
3749               else if (state == 3)
3750                 WITH_CUR_LOCALE (error (0, 0, _("\
3751 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3752               else if (state == 5)
3753                 WITH_CUR_LOCALE (error (0, 0, _("\
3754 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3755             }
3756           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3757           if (arg->tok == tok_eof)
3758             break;
3759           if (arg->tok == tok_eol)
3760             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3761           else if (arg->tok != tok_lc_collate)
3762             lr_error (ldfile, _("\
3763 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3764           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3765           return;
3766
3767         default:
3768         err_label:
3769           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3770         }
3771
3772       /* Prepare for the next round.  */
3773       now = lr_token (ldfile, charmap, result, NULL, verbose);
3774       nowtok = now->tok;
3775     }
3776
3777   /* When we come here we reached the end of the file.  */
3778   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3779 }