locale/programs/ld-collate.c

   1 /* Copyright (C) 1995-2003, 2005-2008, 2009 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <errno.h>
  24 #include <error.h>
  25 #include <stdlib.h>
  26 #include <wchar.h>
  27 #include <sys/param.h>
  28
  29 #include "localedef.h"
  30 #include "charmap.h"
  31 #include "localeinfo.h"
  32 #include "linereader.h"
  33 #include "locfile.h"
  34 #include "elem-hash.h"
  35
  36 /* Uncomment the following line in the production version.  */
  37 /* #define NDEBUG 1 */
  38 #include <assert.h>
  39
  40 #define obstack_chunk_alloc malloc
  41 #define obstack_chunk_free free
  42
  43 static inline void
  44 __attribute ((always_inline))
  45 obstack_int32_grow (struct obstack *obstack, int32_t data)
  46 {
  47   if (sizeof (int32_t) == sizeof (int))
  48     obstack_int_grow (obstack, data);
  49   else
  50     obstack_grow (obstack, &data, sizeof (int32_t));
  51 }
  52
  53 static inline void
  54 __attribute ((always_inline))
  55 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
  56 {
  57   if (sizeof (int32_t) == sizeof (int))
  58     obstack_int_grow_fast (obstack, data);
  59   else
  60     obstack_grow (obstack, &data, sizeof (int32_t));
  61 }
  62
  63 /* Forward declaration.  */
  64 struct element_t;
  65
  66 /* Data type for list of strings.  */
  67 struct section_list
  68 {
  69   /* Successor in the known_sections list.  */
  70   struct section_list *def_next;
  71   /* Successor in the sections list.  */
  72   struct section_list *next;
  73   /* Name of the section.  */
  74   const char *name;
  75   /* First element of this section.  */
  76   struct element_t *first;
  77   /* Last element of this section.  */
  78   struct element_t *last;
  79   /* These are the rules for this section.  */
  80   enum coll_sort_rule *rules;
  81   /* Index of the rule set in the appropriate section of the output file.  */
  82   int ruleidx;
  83 };
  84
  85 struct element_t;
  86
  87 struct element_list_t
  88 {
  89   /* Number of elements.  */
  90   int cnt;
  91
  92   struct element_t **w;
  93 };
  94
  95 /* Data type for collating element.  */
  96 struct element_t
  97 {
  98   const char *name;
  99
 100   const char *mbs;
 101   size_t nmbs;
 102   const uint32_t *wcs;
 103   size_t nwcs;
 104   int *mborder;
 105   int wcorder;
 106
 107   /* The following is a bit mask which bits are set if this element is
 108      used in the appropriate level.  Interesting for the singlebyte
 109      weight computation.
 110
 111      XXX The type here restricts the number of levels to 32.  It could
 112      be changed if necessary but I doubt this is necessary.  */
 113   unsigned int used_in_level;
 114
 115   struct element_list_t *weights;
 116
 117   /* Nonzero if this is a real character definition.  */
 118   int is_character;
 119
 120   /* Order of the character in the sequence.  This information will
 121      be used in range expressions.  */
 122   int mbseqorder;
 123   int wcseqorder;
 124
 125   /* Where does the definition come from.  */
 126   const char *file;
 127   size_t line;
 128
 129   /* Which section does this belong to.  */
 130   struct section_list *section;
 131
 132   /* Predecessor and successor in the order list.  */
 133   struct element_t *last;
 134   struct element_t *next;
 135
 136   /* Next element in multibyte output list.  */
 137   struct element_t *mbnext;
 138   struct element_t *mblast;
 139
 140   /* Next element in wide character output list.  */
 141   struct element_t *wcnext;
 142   struct element_t *wclast;
 143 };
 144
 145 /* Special element value.  */
 146 #define ELEMENT_ELLIPSIS2       ((struct element_t *) 1)
 147 #define ELEMENT_ELLIPSIS3       ((struct element_t *) 2)
 148 #define ELEMENT_ELLIPSIS4       ((struct element_t *) 3)
 149
 150 /* Data type for collating symbol.  */
 151 struct symbol_t
 152 {
 153   const char *name;
 154
 155   /* Point to place in the order list.  */
 156   struct element_t *order;
 157
 158   /* Where does the definition come from.  */
 159   const char *file;
 160   size_t line;
 161 };
 162
 163 /* Sparse table of struct element_t *.  */
 164 #define TABLE wchead_table
 165 #define ELEMENT struct element_t *
 166 #define DEFAULT NULL
 167 #define ITERATE
 168 #define NO_FINALIZE
 169 #include "3level.h"
 170
 171 /* Sparse table of int32_t.  */
 172 #define TABLE collidx_table
 173 #define ELEMENT int32_t
 174 #define DEFAULT 0
 175 #include "3level.h"
 176
 177 /* Sparse table of uint32_t.  */
 178 #define TABLE collseq_table
 179 #define ELEMENT uint32_t
 180 #define DEFAULT ~((uint32_t) 0)
 181 #include "3level.h"
 182
 183
 184 /* Simple name list for the preprocessor.  */
 185 struct name_list
 186 {
 187   struct name_list *next;
 188   char str[0];
 189 };
 190
 191
 192 /* The real definition of the struct for the LC_COLLATE locale.  */
 193 struct locale_collate_t
 194 {
 195   int col_weight_max;
 196   int cur_weight_max;
 197
 198   /* List of known scripts.  */
 199   struct section_list *known_sections;
 200   /* List of used sections.  */
 201   struct section_list *sections;
 202   /* Current section using definition.  */
 203   struct section_list *current_section;
 204   /* There always can be an unnamed section.  */
 205   struct section_list unnamed_section;
 206   /* Flag whether the unnamed section has been defined.  */
 207   bool unnamed_section_defined;
 208   /* To make handling of errors easier we have another section.  */
 209   struct section_list error_section;
 210   /* Sometimes we are defining the values for collating symbols before
 211      the first actual section.  */
 212   struct section_list symbol_section;
 213
 214   /* Start of the order list.  */
 215   struct element_t *start;
 216
 217   /* The undefined element.  */
 218   struct element_t undefined;
 219
 220   /* This is the cursor for `reorder_after' insertions.  */
 221   struct element_t *cursor;
 222
 223   /* This value is used when handling ellipsis.  */
 224   struct element_t ellipsis_weight;
 225
 226   /* Known collating elements.  */
 227   hash_table elem_table;
 228
 229   /* Known collating symbols.  */
 230   hash_table sym_table;
 231
 232   /* Known collation sequences.  */
 233   hash_table seq_table;
 234
 235   struct obstack mempool;
 236
 237   /* The LC_COLLATE category is a bit special as it is sometimes possible
 238      that the definitions from more than one input file contains information.
 239      Therefore we keep all relevant input in a list.  */
 240   struct locale_collate_t *next;
 241
 242   /* Arrays with heads of the list for each of the leading bytes in
 243      the multibyte sequences.  */
 244   struct element_t *mbheads[256];
 245
 246   /* Arrays with heads of the list for each of the leading bytes in
 247      the multibyte sequences.  */
 248   struct wchead_table wcheads;
 249
 250   /* The arrays with the collation sequence order.  */
 251   unsigned char mbseqorder[256];
 252   struct collseq_table wcseqorder;
 253
 254   /* State of the preprocessor.  */
 255   enum
 256     {
 257       else_none = 0,
 258       else_ignore,
 259       else_seen
 260     }
 261     else_action;
 262 };
 263
 264
 265 /* We have a few global variables which are used for reading all
 266    LC_COLLATE category descriptions in all files.  */
 267 static uint32_t nrules;
 268
 269 /* List of defined preprocessor symbols.  */
 270 static struct name_list *defined;
 271
 272
 273 /* We need UTF-8 encoding of numbers.  */
 274 static inline int
 275 __attribute ((always_inline))
 276 utf8_encode (char *buf, int val)
 277 {
 278   int retval;
 279
 280   if (val < 0x80)
 281     {
 282       *buf++ = (char) val;
 283       retval = 1;
 284     }
 285   else
 286     {
 287       int step;
 288
 289       for (step = 2; step < 6; ++step)
 290         if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
 291           break;
 292       retval = step;
 293
 294       *buf = (unsigned char) (~0xff >> step);
 295       --step;
 296       do
 297         {
 298           buf[step] = 0x80 | (val & 0x3f);
 299           val >>= 6;
 300         }
 301       while (--step > 0);
 302       *buf |= val;
 303     }
 304
 305   return retval;
 306 }
 307
 308
 309 static struct section_list *
 310 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 311                    struct section_list *next)
 312 {
 313   struct section_list *newp;
 314
 315   newp = (struct section_list *) obstack_alloc (&collate->mempool,
 316                                                 sizeof (*newp));
 317   newp->next = next;
 318   newp->name = string;
 319   newp->first = NULL;
 320   newp->last = NULL;
 321
 322   return newp;
 323 }
 324
 325
 326 static struct element_t *
 327 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 328              const uint32_t *wcs, const char *name, size_t namelen,
 329              int is_character)
 330 {
 331   struct element_t *newp;
 332
 333   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 334                                              sizeof (*newp));
 335   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 336                                                     name, namelen);
 337   if (mbs != NULL)
 338     {
 339       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
 340       newp->nmbs = mbslen;
 341     }
 342   else
 343     {
 344       newp->mbs = NULL;
 345       newp->nmbs = 0;
 346     }
 347   if (wcs != NULL)
 348     {
 349       size_t nwcs = wcslen ((wchar_t *) wcs);
 350       uint32_t zero = 0;
 351       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
 352       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
 353       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
 354       newp->nwcs = nwcs;
 355     }
 356   else
 357     {
 358       newp->wcs = NULL;
 359       newp->nwcs = 0;
 360     }
 361   newp->mborder = NULL;
 362   newp->wcorder = 0;
 363   newp->used_in_level = 0;
 364   newp->is_character = is_character;
 365
 366   /* Will be assigned later.  XXX  */
 367   newp->mbseqorder = 0;
 368   newp->wcseqorder = 0;
 369
 370   /* Will be allocated later.  */
 371   newp->weights = NULL;
 372
 373   newp->file = NULL;
 374   newp->line = 0;
 375
 376   newp->section = collate->current_section;
 377
 378   newp->last = NULL;
 379   newp->next = NULL;
 380
 381   newp->mbnext = NULL;
 382   newp->mblast = NULL;
 383
 384   newp->wcnext = NULL;
 385   newp->wclast = NULL;
 386
 387   return newp;
 388 }
 389
 390
 391 static struct symbol_t *
 392 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
 393 {
 394   struct symbol_t *newp;
 395
 396   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
 397
 398   newp->name = obstack_copy0 (&collate->mempool, name, len);
 399   newp->order = NULL;
 400
 401   newp->file = NULL;
 402   newp->line = 0;
 403
 404   return newp;
 405 }
 406
 407
 408 /* Test whether this name is already defined somewhere.  */
 409 static int
 410 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
 411                  const struct charmap_t *charmap,
 412                  struct repertoire_t *repertoire, const char *symbol,
 413                  size_t symbol_len)
 414 {
 415   void *ignore = NULL;
 416
 417   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
 418     {
 419       lr_error (ldfile, _("`%.*s' already defined in charmap"),
 420                 (int) symbol_len, symbol);
 421       return 1;
 422     }
 423
 424   if (repertoire != NULL
 425       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
 426           == 0))
 427     {
 428       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
 429                 (int) symbol_len, symbol);
 430       return 1;
 431     }
 432
 433   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
 434     {
 435       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
 436                 (int) symbol_len, symbol);
 437       return 1;
 438     }
 439
 440   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
 441     {
 442       lr_error (ldfile, _("`%.*s' already defined as collating element"),
 443                 (int) symbol_len, symbol);
 444       return 1;
 445     }
 446
 447   return 0;
 448 }
 449
 450
 451 /* Read the direction specification.  */
 452 static void
 453 read_directions (struct linereader *ldfile, struct token *arg,
 454                  const struct charmap_t *charmap,
 455                  struct repertoire_t *repertoire, struct localedef_t *result)
 456 {
 457   int cnt = 0;
 458   int max = nrules ?: 10;
 459   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
 460   int warned = 0;
 461   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 462
 463   while (1)
 464     {
 465       int valid = 0;
 466
 467       if (arg->tok == tok_forward)
 468         {
 469           if (rules[cnt] & sort_backward)
 470             {
 471               if (! warned)
 472                 {
 473                   lr_error (ldfile, _("\
 474 %s: `forward' and `backward' are mutually excluding each other"),
 475                             "LC_COLLATE");
 476                   warned = 1;
 477                 }
 478             }
 479           else if (rules[cnt] & sort_forward)
 480             {
 481               if (! warned)
 482                 {
 483                   lr_error (ldfile, _("\
 484 %s: `%s' mentioned more than once in definition of weight %d"),
 485                             "LC_COLLATE", "forward", cnt + 1);
 486                 }
 487             }
 488           else
 489             rules[cnt] |= sort_forward;
 490
 491           valid = 1;
 492         }
 493       else if (arg->tok == tok_backward)
 494         {
 495           if (rules[cnt] & sort_forward)
 496             {
 497               if (! warned)
 498                 {
 499                   lr_error (ldfile, _("\
 500 %s: `forward' and `backward' are mutually excluding each other"),
 501                             "LC_COLLATE");
 502                   warned = 1;
 503                 }
 504             }
 505           else if (rules[cnt] & sort_backward)
 506             {
 507               if (! warned)
 508                 {
 509                   lr_error (ldfile, _("\
 510 %s: `%s' mentioned more than once in definition of weight %d"),
 511                             "LC_COLLATE", "backward", cnt + 1);
 512                 }
 513             }
 514           else
 515             rules[cnt] |= sort_backward;
 516
 517           valid = 1;
 518         }
 519       else if (arg->tok == tok_position)
 520         {
 521           if (rules[cnt] & sort_position)
 522             {
 523               if (! warned)
 524                 {
 525                   lr_error (ldfile, _("\
 526 %s: `%s' mentioned more than once in definition of weight %d"),
 527                             "LC_COLLATE", "position", cnt + 1);
 528                 }
 529             }
 530           else
 531             rules[cnt] |= sort_position;
 532
 533           valid = 1;
 534         }
 535
 536       if (valid)
 537         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 538
 539       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
 540           || arg->tok == tok_semicolon)
 541         {
 542           if (! valid && ! warned)
 543             {
 544               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 545               warned = 1;
 546             }
 547
 548           /* See whether we have to increment the counter.  */
 549           if (arg->tok != tok_comma && rules[cnt] != 0)
 550             {
 551               /* Add the default `forward' if we have seen only `position'.  */
 552               if (rules[cnt] == sort_position)
 553                 rules[cnt] = sort_position | sort_forward;
 554
 555               ++cnt;
 556             }
 557
 558           if (arg->tok == tok_eof || arg->tok == tok_eol)
 559             /* End of line or file, so we exit the loop.  */
 560             break;
 561
 562           if (nrules == 0)
 563             {
 564               /* See whether we have enough room in the array.  */
 565               if (cnt == max)
 566                 {
 567                   max += 10;
 568                   rules = (enum coll_sort_rule *) xrealloc (rules,
 569                                                             max
 570                                                             * sizeof (*rules));
 571                   memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
 572                 }
 573             }
 574           else
 575             {
 576               if (cnt == nrules)
 577                 {
 578                   /* There must not be any more rule.  */
 579                   if (! warned)
 580                     {
 581                       lr_error (ldfile, _("\
 582 %s: too many rules; first entry only had %d"),
 583                                 "LC_COLLATE", nrules);
 584                       warned = 1;
 585                     }
 586
 587                   lr_ignore_rest (ldfile, 0);
 588                   break;
 589                 }
 590             }
 591         }
 592       else
 593         {
 594           if (! warned)
 595             {
 596               lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 597               warned = 1;
 598             }
 599         }
 600
 601       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 602     }
 603
 604   if (nrules == 0)
 605     {
 606       /* Now we know how many rules we have.  */
 607       nrules = cnt;
 608       rules = (enum coll_sort_rule *) xrealloc (rules,
 609                                                 nrules * sizeof (*rules));
 610     }
 611   else
 612     {
 613       if (cnt < nrules)
 614         {
 615           /* Not enough rules in this specification.  */
 616           if (! warned)
 617             lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
 618
 619           do
 620             rules[cnt] = sort_forward;
 621           while (++cnt < nrules);
 622         }
 623     }
 624
 625   collate->current_section->rules = rules;
 626 }
 627
 628
 629 static struct element_t *
 630 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 631               const char *str, size_t len)
 632 {
 633   void *result = NULL;
 634
 635   /* Search for the entries among the collation sequences already define.  */
 636   if (find_entry (&collate->seq_table, str, len, &result) != 0)
 637     {
 638       /* Nope, not define yet.  So we see whether it is a
 639          collation symbol.  */
 640       void *ptr;
 641
 642       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
 643         {
 644           /* It's a collation symbol.  */
 645           struct symbol_t *sym = (struct symbol_t *) ptr;
 646           result = sym->order;
 647
 648           if (result == NULL)
 649             result = sym->order = new_element (collate, NULL, 0, NULL,
 650                                                NULL, 0, 0);
 651         }
 652       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
 653         {
 654           /* It's also no collation element.  So it is a character
 655              element defined later.  */
 656           result = new_element (collate, NULL, 0, NULL, str, len, 1);
 657           /* Insert it into the sequence table.  */
 658           insert_entry (&collate->seq_table, str, len, result);
 659         }
 660     }
 661
 662   return (struct element_t *) result;
 663 }
 664
 665
 666 static void
 667 unlink_element (struct locale_collate_t *collate)
 668 {
 669   if (collate->cursor == collate->start)
 670     {
 671       assert (collate->cursor->next == NULL);
 672       assert (collate->cursor->last == NULL);
 673       collate->cursor = NULL;
 674     }
 675   else
 676     {
 677       if (collate->cursor->next != NULL)
 678         collate->cursor->next->last = collate->cursor->last;
 679       if (collate->cursor->last != NULL)
 680         collate->cursor->last->next = collate->cursor->next;
 681       collate->cursor = collate->cursor->last;
 682     }
 683 }
 684
 685
 686 static void
 687 insert_weights (struct linereader *ldfile, struct element_t *elem,
 688                 const struct charmap_t *charmap,
 689                 struct repertoire_t *repertoire, struct localedef_t *result,
 690                 enum token_t ellipsis)
 691 {
 692   int weight_cnt;
 693   struct token *arg;
 694   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 695
 696   /* Initialize all the fields.  */
 697   elem->file = ldfile->fname;
 698   elem->line = ldfile->lineno;
 699
 700   elem->last = collate->cursor;
 701   elem->next = collate->cursor ? collate->cursor->next : NULL;
 702   if (collate->cursor != NULL && collate->cursor->next != NULL)
 703     collate->cursor->next->last = elem;
 704   if (collate->cursor != NULL)
 705     collate->cursor->next = elem;
 706   if (collate->start == NULL)
 707     {
 708       assert (collate->cursor == NULL);
 709       collate->start = elem;
 710     }
 711
 712   elem->section = collate->current_section;
 713
 714   if (collate->current_section->first == NULL)
 715     collate->current_section->first = elem;
 716   if (collate->current_section->last == collate->cursor)
 717     collate->current_section->last = elem;
 718
 719   collate->cursor = elem;
 720
 721   elem->weights = (struct element_list_t *)
 722     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
 723   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
 724
 725   weight_cnt = 0;
 726
 727   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 728   do
 729     {
 730       if (arg->tok == tok_eof || arg->tok == tok_eol)
 731         break;
 732
 733       if (arg->tok == tok_ignore)
 734         {
 735           /* The weight for this level has to be ignored.  We use the
 736              null pointer to indicate this.  */
 737           elem->weights[weight_cnt].w = (struct element_t **)
 738             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 739           elem->weights[weight_cnt].w[0] = NULL;
 740           elem->weights[weight_cnt].cnt = 1;
 741         }
 742       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 743         {
 744           char ucs4str[10];
 745           struct element_t *val;
 746           char *symstr;
 747           size_t symlen;
 748
 749           if (arg->tok == tok_bsymbol)
 750             {
 751               symstr = arg->val.str.startmb;
 752               symlen = arg->val.str.lenmb;
 753             }
 754           else
 755             {
 756               snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 757               symstr = ucs4str;
 758               symlen = 9;
 759             }
 760
 761           val = find_element (ldfile, collate, symstr, symlen);
 762           if (val == NULL)
 763             break;
 764
 765           elem->weights[weight_cnt].w = (struct element_t **)
 766             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 767           elem->weights[weight_cnt].w[0] = val;
 768           elem->weights[weight_cnt].cnt = 1;
 769         }
 770       else if (arg->tok == tok_string)
 771         {
 772           /* Split the string up in the individual characters and put
 773              the element definitions in the list.  */
 774           const char *cp = arg->val.str.startmb;
 775           int cnt = 0;
 776           struct element_t *charelem;
 777           struct element_t **weights = NULL;
 778           int max = 0;
 779
 780           if (*cp == '\0')
 781             {
 782               lr_error (ldfile, _("%s: empty weight string not allowed"),
 783                         "LC_COLLATE");
 784               lr_ignore_rest (ldfile, 0);
 785               break;
 786             }
 787
 788           do
 789             {
 790               if (*cp == '<')
 791                 {
 792                   /* Ahh, it's a bsymbol or an UCS4 value.  If it's
 793                      the latter we have to unify the name.  */
 794                   const char *startp = ++cp;
 795                   size_t len;
 796
 797                   while (*cp != '>')
 798                     {
 799                       if (*cp == ldfile->escape_char)
 800                         ++cp;
 801                       if (*cp == '\0')
 802                         /* It's a syntax error.  */
 803                         goto syntax;
 804
 805                       ++cp;
 806                     }
 807
 808                   if (cp - startp == 5 && startp[0] == 'U'
 809                       && isxdigit (startp[1]) && isxdigit (startp[2])
 810                       && isxdigit (startp[3]) && isxdigit (startp[4]))
 811                     {
 812                       unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
 813                       char *newstr;
 814
 815                       newstr = (char *) xmalloc (10);
 816                       snprintf (newstr, 10, "U%08X", ucs4);
 817                       startp = newstr;
 818
 819                       len = 9;
 820                     }
 821                   else
 822                     len = cp - startp;
 823
 824                   charelem = find_element (ldfile, collate, startp, len);
 825                   ++cp;
 826                 }
 827               else
 828                 {
 829                   /* People really shouldn't use characters directly in
 830                      the string.  Especially since it's not really clear
 831                      what this means.  We interpret all characters in the
 832                      string as if that would be bsymbols.  Otherwise we
 833                      would have to match back to bsymbols somehow and this
 834                      is normally not what people normally expect.  */
 835                   charelem = find_element (ldfile, collate, cp++, 1);
 836                 }
 837
 838               if (charelem == NULL)
 839                 {
 840                   /* We ignore the rest of the line.  */
 841                   lr_ignore_rest (ldfile, 0);
 842                   break;
 843                 }
 844
 845               /* Add the pointer.  */
 846               if (cnt >= max)
 847                 {
 848                   struct element_t **newp;
 849                   max += 10;
 850                   newp = (struct element_t **)
 851                     alloca (max * sizeof (struct element_t *));
 852                   memcpy (newp, weights, cnt * sizeof (struct element_t *));
 853                   weights = newp;
 854                 }
 855               weights[cnt++] = charelem;
 856             }
 857           while (*cp != '\0');
 858
 859           /* Now store the information.  */
 860           elem->weights[weight_cnt].w = (struct element_t **)
 861             obstack_alloc (&collate->mempool,
 862                            cnt * sizeof (struct element_t *));
 863           memcpy (elem->weights[weight_cnt].w, weights,
 864                   cnt * sizeof (struct element_t *));
 865           elem->weights[weight_cnt].cnt = cnt;
 866
 867           /* We don't need the string anymore.  */
 868           free (arg->val.str.startmb);
 869         }
 870       else if (ellipsis != tok_none
 871                && (arg->tok == tok_ellipsis2
 872                    || arg->tok == tok_ellipsis3
 873                    || arg->tok == tok_ellipsis4))
 874         {
 875           /* It must be the same ellipsis as used in the initial column.  */
 876           if (arg->tok != ellipsis)
 877             lr_error (ldfile, _("\
 878 %s: weights must use the same ellipsis symbol as the name"),
 879                       "LC_COLLATE");
 880
 881           /* The weight for this level will depend on the element
 882              iterating over the range.  Put a placeholder.  */
 883           elem->weights[weight_cnt].w = (struct element_t **)
 884             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 885           elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 886           elem->weights[weight_cnt].cnt = 1;
 887         }
 888       else
 889         {
 890         syntax:
 891           /* It's a syntax error.  */
 892           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 893           lr_ignore_rest (ldfile, 0);
 894           break;
 895         }
 896
 897       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 898       /* This better should be the end of the line or a semicolon.  */
 899       if (arg->tok == tok_semicolon)
 900         /* OK, ignore this and read the next token.  */
 901         arg = lr_token (ldfile, charmap, result, repertoire, verbose);
 902       else if (arg->tok != tok_eof && arg->tok != tok_eol)
 903         {
 904           /* It's a syntax error.  */
 905           lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
 906           lr_ignore_rest (ldfile, 0);
 907           break;
 908         }
 909     }
 910   while (++weight_cnt < nrules);
 911
 912   if (weight_cnt < nrules)
 913     {
 914       /* This means the rest of the line uses the current element as
 915          the weight.  */
 916       do
 917         {
 918           elem->weights[weight_cnt].w = (struct element_t **)
 919             obstack_alloc (&collate->mempool, sizeof (struct element_t *));
 920           if (ellipsis == tok_none)
 921             elem->weights[weight_cnt].w[0] = elem;
 922           else
 923             elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
 924           elem->weights[weight_cnt].cnt = 1;
 925         }
 926       while (++weight_cnt < nrules);
 927     }
 928   else
 929     {
 930       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
 931         {
 932           /* Too many rule values.  */
 933           lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
 934           lr_ignore_rest (ldfile, 0);
 935         }
 936       else
 937         lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
 938     }
 939 }
 940
 941
 942 static int
 943 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
 944               const struct charmap_t *charmap, struct repertoire_t *repertoire,
 945               struct localedef_t *result)
 946 {
 947   /* First find out what kind of symbol this is.  */
 948   struct charseq *seq;
 949   uint32_t wc;
 950   struct element_t *elem = NULL;
 951   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
 952
 953   /* Try to find the character in the charmap.  */
 954   seq = charmap_find_value (charmap, symstr, symlen);
 955
 956   /* Determine the wide character.  */
 957   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
 958     {
 959       wc = repertoire_find_value (repertoire, symstr, symlen);
 960       if (seq != NULL)
 961         seq->ucs4 = wc;
 962     }
 963   else
 964     wc = seq->ucs4;
 965
 966   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
 967     {
 968       /* It's no character, so look through the collation elements and
 969          symbol list.  */
 970       void *ptr = elem;
 971       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
 972         {
 973           void *result;
 974           struct symbol_t *sym = NULL;
 975
 976           /* It's also collation element.  Therefore it's either a
 977              collating symbol or it's a character which is not
 978              supported by the character set.  In the later case we
 979              simply create a dummy entry.  */
 980           if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
 981             {
 982               /* It's a collation symbol.  */
 983               sym = (struct symbol_t *) result;
 984
 985               elem = sym->order;
 986             }
 987
 988           if (elem == NULL)
 989             {
 990               elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
 991
 992               if (sym != NULL)
 993                 sym->order = elem;
 994               else
 995                 /* Enter a fake element in the sequence table.  This
 996                    won't cause anything in the output since there is
 997                    no multibyte or wide character associated with
 998                    it.  */
 999                 insert_entry (&collate->seq_table, symstr, symlen, elem);
1000             }
1001         }
1002       else
1003         /* Copy the result back.  */
1004         elem = ptr;
1005     }
1006   else
1007     {
1008       /* Otherwise the symbols stands for a character.  */
1009       void *ptr = elem;
1010       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1011         {
1012           uint32_t wcs[2] = { wc, 0 };
1013
1014           /* We have to allocate an entry.  */
1015           elem = new_element (collate,
1016                               seq != NULL ? (char *) seq->bytes : NULL,
1017                               seq != NULL ? seq->nbytes : 0,
1018                               wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1019                               symstr, symlen, 1);
1020
1021           /* And add it to the table.  */
1022           if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1023             /* This cannot happen.  */
1024             assert (! "Internal error");
1025         }
1026       else
1027         {
1028           /* Copy the result back.  */
1029           elem = ptr;
1030
1031           /* Maybe the character was used before the definition.  In this case
1032              we have to insert the byte sequences now.  */
1033           if (elem->mbs == NULL && seq != NULL)
1034             {
1035               elem->mbs = obstack_copy0 (&collate->mempool,
1036                                          seq->bytes, seq->nbytes);
1037               elem->nmbs = seq->nbytes;
1038             }
1039
1040           if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1041             {
1042               uint32_t wcs[2] = { wc, 0 };
1043
1044               elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1045               elem->nwcs = 1;
1046             }
1047         }
1048     }
1049
1050   /* Test whether this element is not already in the list.  */
1051   if (elem->next != NULL || elem == collate->cursor)
1052     {
1053       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1054                 (int) symlen, symstr, elem->file, elem->line);
1055       lr_ignore_rest (ldfile, 0);
1056       return 1;
1057     }
1058
1059   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1060
1061   return 0;
1062 }
1063
1064
1065 static void
1066 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1067                  enum token_t ellipsis, const struct charmap_t *charmap,
1068                  struct repertoire_t *repertoire,
1069                  struct localedef_t *result)
1070 {
1071   struct element_t *startp;
1072   struct element_t *endp;
1073   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1074
1075   /* Unlink the entry added for the ellipsis.  */
1076   unlink_element (collate);
1077   startp = collate->cursor;
1078
1079   /* Process and add the end-entry.  */
1080   if (symstr != NULL
1081       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1082     /* Something went wrong with inserting the to-value.  This means
1083        we cannot process the ellipsis.  */
1084     return;
1085
1086   /* Reset the cursor.  */
1087   collate->cursor = startp;
1088
1089   /* Now we have to handle many different situations:
1090      - we have to distinguish between the three different ellipsis forms
1091      - the is the ellipsis at the beginning, in the middle, or at the end.
1092   */
1093   endp = collate->cursor->next;
1094   assert (symstr == NULL || endp != NULL);
1095
1096   /* XXX The following is probably very wrong since also collating symbols
1097      can appear in ranges.  But do we want/can refine the test for that?  */
1098 #if 0
1099   /* Both, the start and the end symbol, must stand for characters.  */
1100   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1101       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1102     {
1103       lr_error (ldfile, _("\
1104 %s: the start and the end symbol of a range must stand for characters"),
1105                 "LC_COLLATE");
1106       return;
1107     }
1108 #endif
1109
1110   if (ellipsis == tok_ellipsis3)
1111     {
1112       /* One requirement we make here: the length of the byte
1113          sequences for the first and end character must be the same.
1114          This is mainly to prevent unwanted effects and this is often
1115          not what is wanted.  */
1116       size_t len = (startp->mbs != NULL ? startp->nmbs
1117                     : (endp->mbs != NULL ? endp->nmbs : 0));
1118       char mbcnt[len + 1];
1119       char mbend[len + 1];
1120
1121       /* Well, this should be caught somewhere else already.  Just to
1122          make sure.  */
1123       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1124       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1125
1126       if (startp != NULL && endp != NULL
1127           && startp->mbs != NULL && endp->mbs != NULL
1128           && startp->nmbs != endp->nmbs)
1129         {
1130           lr_error (ldfile, _("\
1131 %s: byte sequences of first and last character must have the same length"),
1132                     "LC_COLLATE");
1133           return;
1134         }
1135
1136       /* Determine whether we have to generate multibyte sequences.  */
1137       if ((startp == NULL || startp->mbs != NULL)
1138           && (endp == NULL || endp->mbs != NULL))
1139         {
1140           int cnt;
1141           int ret;
1142
1143           /* Prepare the beginning byte sequence.  This is either from the
1144              beginning byte sequence or it is all nulls if it was an
1145              initial ellipsis.  */
1146           if (startp == NULL || startp->mbs == NULL)
1147             memset (mbcnt, '\0', len);
1148           else
1149             {
1150               memcpy (mbcnt, startp->mbs, len);
1151
1152               /* And increment it so that the value is the first one we will
1153                  try to insert.  */
1154               for (cnt = len - 1; cnt >= 0; --cnt)
1155                 if (++mbcnt[cnt] != '\0')
1156                   break;
1157             }
1158           mbcnt[len] = '\0';
1159
1160           /* And the end sequence.  */
1161           if (endp == NULL || endp->mbs == NULL)
1162             memset (mbend, '\0', len);
1163           else
1164             memcpy (mbend, endp->mbs, len);
1165           mbend[len] = '\0';
1166
1167           /* Test whether we have a correct range.  */
1168           ret = memcmp (mbcnt, mbend, len);
1169           if (ret >= 0)
1170             {
1171               if (ret > 0)
1172                 lr_error (ldfile, _("%s: byte sequence of first character of \
1173 range is not lower than that of the last character"), "LC_COLLATE");
1174               return;
1175             }
1176
1177           /* Generate the byte sequences data.  */
1178           while (1)
1179             {
1180               struct charseq *seq;
1181
1182               /* Quite a bit of work ahead.  We have to find the character
1183                  definition for the byte sequence and then determine the
1184                  wide character belonging to it.  */
1185               seq = charmap_find_symbol (charmap, mbcnt, len);
1186               if (seq != NULL)
1187                 {
1188                   struct element_t *elem;
1189                   size_t namelen;
1190
1191                   /* I don't think this can ever happen.  */
1192                   assert (seq->name != NULL);
1193                   namelen = strlen (seq->name);
1194
1195                   if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1196                     seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1197                                                        namelen);
1198
1199                   /* Now we are ready to insert the new value in the
1200                      sequence.  Find out whether the element is
1201                      already known.  */
1202                   void *ptr;
1203                   if (find_entry (&collate->seq_table, seq->name, namelen,
1204                                   &ptr) != 0)
1205                     {
1206                       uint32_t wcs[2] = { seq->ucs4, 0 };
1207
1208                       /* We have to allocate an entry.  */
1209                       elem = new_element (collate, mbcnt, len,
1210                                           seq->ucs4 == ILLEGAL_CHAR_VALUE
1211                                           ? NULL : wcs, seq->name,
1212                                           namelen, 1);
1213
1214                       /* And add it to the table.  */
1215                       if (insert_entry (&collate->seq_table, seq->name,
1216                                         namelen, elem) != 0)
1217                         /* This cannot happen.  */
1218                         assert (! "Internal error");
1219                     }
1220                   else
1221                     /* Copy the result.  */
1222                     elem = ptr;
1223
1224                   /* Test whether this element is not already in the list.  */
1225                   if (elem->next != NULL || (collate->cursor != NULL
1226                                              && elem->next == collate->cursor))
1227                     {
1228                       lr_error (ldfile, _("\
1229 order for `%.*s' already defined at %s:%Zu"),
1230                                 (int) namelen, seq->name,
1231                                 elem->file, elem->line);
1232                       goto increment;
1233                     }
1234
1235                   /* Enqueue the new element.  */
1236                   elem->last = collate->cursor;
1237                   if (collate->cursor == NULL)
1238                     elem->next = NULL;
1239                   else
1240                     {
1241                       elem->next = collate->cursor->next;
1242                       elem->last->next = elem;
1243                       if (elem->next != NULL)
1244                         elem->next->last = elem;
1245                     }
1246                   if (collate->start == NULL)
1247                     {
1248                       assert (collate->cursor == NULL);
1249                       collate->start = elem;
1250                     }
1251                   collate->cursor = elem;
1252
1253                  /* Add the weight value.  We take them from the
1254                     `ellipsis_weights' member of `collate'.  */
1255                   elem->weights = (struct element_list_t *)
1256                     obstack_alloc (&collate->mempool,
1257                                    nrules * sizeof (struct element_list_t));
1258                   for (cnt = 0; cnt < nrules; ++cnt)
1259                     if (collate->ellipsis_weight.weights[cnt].cnt == 1
1260                         && (collate->ellipsis_weight.weights[cnt].w[0]
1261                             == ELEMENT_ELLIPSIS2))
1262                       {
1263                         elem->weights[cnt].w = (struct element_t **)
1264                           obstack_alloc (&collate->mempool,
1265                                          sizeof (struct element_t *));
1266                         elem->weights[cnt].w[0] = elem;
1267                         elem->weights[cnt].cnt = 1;
1268                       }
1269                     else
1270                       {
1271                         /* Simply use the weight from `ellipsis_weight'.  */
1272                         elem->weights[cnt].w =
1273                           collate->ellipsis_weight.weights[cnt].w;
1274                         elem->weights[cnt].cnt =
1275                           collate->ellipsis_weight.weights[cnt].cnt;
1276                       }
1277                 }
1278
1279               /* Increment for the next round.  */
1280             increment:
1281               for (cnt = len - 1; cnt >= 0; --cnt)
1282                 if (++mbcnt[cnt] != '\0')
1283                   break;
1284
1285               /* Find out whether this was all.  */
1286               if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1287                 /* Yep, that's all.  */
1288                 break;
1289             }
1290         }
1291     }
1292   else
1293     {
1294       /* For symbolic range we naturally must have a beginning and an
1295          end specified by the user.  */
1296       if (startp == NULL)
1297         lr_error (ldfile, _("\
1298 %s: symbolic range ellipsis must not directly follow `order_start'"),
1299                   "LC_COLLATE");
1300       else if (endp == NULL)
1301         lr_error (ldfile, _("\
1302 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1303                   "LC_COLLATE");
1304       else
1305         {
1306           /* Determine the range.  To do so we have to determine the
1307              common prefix of the both names and then the numeric
1308              values of both ends.  */
1309           size_t lenfrom = strlen (startp->name);
1310           size_t lento = strlen (endp->name);
1311           char buf[lento + 1];
1312           int preflen = 0;
1313           long int from;
1314           long int to;
1315           char *cp;
1316           int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1317
1318           if (lenfrom != lento)
1319             {
1320             invalid_range:
1321               lr_error (ldfile, _("\
1322 `%s' and `%.*s' are not valid names for symbolic range"),
1323                         startp->name, (int) lento, endp->name);
1324               return;
1325             }
1326
1327           while (startp->name[preflen] == endp->name[preflen])
1328             if (startp->name[preflen] == '\0')
1329               /* Nothing to be done.  The start and end point are identical
1330                  and while inserting the end point we have already given
1331                  the user an error message.  */
1332               return;
1333             else
1334               ++preflen;
1335
1336           errno = 0;
1337           from = strtol (startp->name + preflen, &cp, base);
1338           if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1339             goto invalid_range;
1340
1341           errno = 0;
1342           to = strtol (endp->name + preflen, &cp, base);
1343           if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1344             goto invalid_range;
1345
1346           /* Copy the prefix.  */
1347           memcpy (buf, startp->name, preflen);
1348
1349           /* Loop over all values.  */
1350           for (++from; from < to; ++from)
1351             {
1352               struct element_t *elem = NULL;
1353               struct charseq *seq;
1354               uint32_t wc;
1355               int cnt;
1356
1357               /* Generate the name.  */
1358               sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1359                        (int) (lenfrom - preflen), from);
1360
1361               /* Look whether this name is already defined.  */
1362               void *ptr;
1363               if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1364                 {
1365                   /* Copy back the result.  */
1366                   elem = ptr;
1367
1368                   if (elem->next != NULL || (collate->cursor != NULL
1369                                              && elem->next == collate->cursor))
1370                     {
1371                       lr_error (ldfile, _("\
1372 %s: order for `%.*s' already defined at %s:%Zu"),
1373                                 "LC_COLLATE", (int) lenfrom, buf,
1374                                 elem->file, elem->line);
1375                       continue;
1376                     }
1377
1378                   if (elem->name == NULL)
1379                     {
1380                       lr_error (ldfile, _("%s: `%s' must be a character"),
1381                                 "LC_COLLATE", buf);
1382                       continue;
1383                     }
1384                 }
1385
1386               if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1387                 {
1388                   /* Search for a character of this name.  */
1389                   seq = charmap_find_value (charmap, buf, lenfrom);
1390                   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1391                     {
1392                       wc = repertoire_find_value (repertoire, buf, lenfrom);
1393
1394                       if (seq != NULL)
1395                         seq->ucs4 = wc;
1396                     }
1397                   else
1398                     wc = seq->ucs4;
1399
1400                   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1401                     /* We don't know anything about a character with this
1402                        name.  XXX Should we warn?  */
1403                     continue;
1404
1405                   if (elem == NULL)
1406                     {
1407                       uint32_t wcs[2] = { wc, 0 };
1408
1409                       /* We have to allocate an entry.  */
1410                       elem = new_element (collate,
1411                                           seq != NULL
1412                                           ? (char *) seq->bytes : NULL,
1413                                           seq != NULL ? seq->nbytes : 0,
1414                                           wc == ILLEGAL_CHAR_VALUE
1415                                           ? NULL : wcs, buf, lenfrom, 1);
1416                     }
1417                   else
1418                     {
1419                       /* Update the element.  */
1420                       if (seq != NULL)
1421                         {
1422                           elem->mbs = obstack_copy0 (&collate->mempool,
1423                                                      seq->bytes, seq->nbytes);
1424                           elem->nmbs = seq->nbytes;
1425                         }
1426
1427                       if (wc != ILLEGAL_CHAR_VALUE)
1428                         {
1429                           uint32_t zero = 0;
1430
1431                           obstack_grow (&collate->mempool,
1432                                         &wc, sizeof (uint32_t));
1433                           obstack_grow (&collate->mempool,
1434                                         &zero, sizeof (uint32_t));
1435                           elem->wcs = obstack_finish (&collate->mempool);
1436                           elem->nwcs = 1;
1437                         }
1438                     }
1439
1440                   elem->file = ldfile->fname;
1441                   elem->line = ldfile->lineno;
1442                   elem->section = collate->current_section;
1443                 }
1444
1445               /* Enqueue the new element.  */
1446               elem->last = collate->cursor;
1447               elem->next = collate->cursor->next;
1448               elem->last->next = elem;
1449               if (elem->next != NULL)
1450                 elem->next->last = elem;
1451               collate->cursor = elem;
1452
1453               /* Now add the weights.  They come from the `ellipsis_weights'
1454                  member of `collate'.  */
1455               elem->weights = (struct element_list_t *)
1456                 obstack_alloc (&collate->mempool,
1457                                nrules * sizeof (struct element_list_t));
1458               for (cnt = 0; cnt < nrules; ++cnt)
1459                 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1460                     && (collate->ellipsis_weight.weights[cnt].w[0]
1461                         == ELEMENT_ELLIPSIS2))
1462                   {
1463                     elem->weights[cnt].w = (struct element_t **)
1464                       obstack_alloc (&collate->mempool,
1465                                      sizeof (struct element_t *));
1466                     elem->weights[cnt].w[0] = elem;
1467                     elem->weights[cnt].cnt = 1;
1468                   }
1469                 else
1470                   {
1471                     /* Simly use the weight from `ellipsis_weight'.  */
1472                     elem->weights[cnt].w =
1473                       collate->ellipsis_weight.weights[cnt].w;
1474                     elem->weights[cnt].cnt =
1475                       collate->ellipsis_weight.weights[cnt].cnt;
1476                   }
1477             }
1478         }
1479     }
1480 }
1481
1482
1483 static void
1484 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1485                  struct localedef_t *copy_locale, int ignore_content)
1486 {
1487   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1488     {
1489       struct locale_collate_t *collate;
1490
1491       if (copy_locale == NULL)
1492         {
1493           collate = locale->categories[LC_COLLATE].collate =
1494             (struct locale_collate_t *)
1495             xcalloc (1, sizeof (struct locale_collate_t));
1496
1497           /* Init the various data structures.  */
1498           init_hash (&collate->elem_table, 100);
1499           init_hash (&collate->sym_table, 100);
1500           init_hash (&collate->seq_table, 500);
1501           obstack_init (&collate->mempool);
1502
1503           collate->col_weight_max = -1;
1504         }
1505       else
1506         /* Reuse the copy_locale's data structures.  */
1507         collate = locale->categories[LC_COLLATE].collate =
1508           copy_locale->categories[LC_COLLATE].collate;
1509     }
1510
1511   ldfile->translate_strings = 0;
1512   ldfile->return_widestr = 0;
1513 }
1514
1515
1516 void
1517 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1518 {
1519   /* Now is the time when we can assign the individual collation
1520      values for all the symbols.  We have possibly different values
1521      for the wide- and the multibyte-character symbols.  This is done
1522      since it might make a difference in the encoding if there is in
1523      some cases no multibyte-character but there are wide-characters.
1524      (The other way around it is not important since theencoded
1525      collation value in the wide-character case is 32 bits wide and
1526      therefore requires no encoding).
1527
1528      The lowest collation value assigned is 2.  Zero is reserved for
1529      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1530      functions and 1 is used to separate the individual passes for the
1531      different rules.
1532
1533      We also have to construct is list with all the bytes/words which
1534      can come first in a sequence, followed by all the elements which
1535      also start with this byte/word.  The order is reverse which has
1536      among others the important effect that longer strings are located
1537      first in the list.  This is required for the output data since
1538      the algorithm used in `strcoll' etc depends on this.
1539
1540      The multibyte case is easy.  We simply sort into an array with
1541      256 elements.  */
1542   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1543   int mbact[nrules];
1544   int wcact;
1545   int mbseqact;
1546   int wcseqact;
1547   struct element_t *runp;
1548   int i;
1549   int need_undefined = 0;
1550   struct section_list *sect;
1551   int ruleidx;
1552   int nr_wide_elems = 0;
1553
1554   if (collate == NULL)
1555     {
1556       /* No data, no check.  */
1557       if (! be_quiet)
1558         WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1559                                 "LC_COLLATE"));
1560       return;
1561     }
1562
1563   /* If this assertion is hit change the type in `element_t'.  */
1564   assert (nrules <= sizeof (runp->used_in_level) * 8);
1565
1566   /* Make sure that the `position' rule is used either in all sections
1567      or in none.  */
1568   for (i = 0; i < nrules; ++i)
1569     for (sect = collate->sections; sect != NULL; sect = sect->next)
1570       if (sect != collate->current_section
1571           && sect->rules != NULL
1572           && ((sect->rules[i] & sort_position)
1573               != (collate->current_section->rules[i] & sort_position)))
1574         {
1575           WITH_CUR_LOCALE (error (0, 0, _("\
1576 %s: `position' must be used for a specific level in all sections or none"),
1577                                   "LC_COLLATE"));
1578           break;
1579         }
1580
1581   /* Find out which elements are used at which level.  At the same
1582      time we find out whether we have any undefined symbols.  */
1583   runp = collate->start;
1584   while (runp != NULL)
1585     {
1586       if (runp->mbs != NULL)
1587         {
1588           for (i = 0; i < nrules; ++i)
1589             {
1590               int j;
1591
1592               for (j = 0; j < runp->weights[i].cnt; ++j)
1593                 /* A NULL pointer as the weight means IGNORE.  */
1594                 if (runp->weights[i].w[j] != NULL)
1595                   {
1596                     if (runp->weights[i].w[j]->weights == NULL)
1597                       {
1598                         WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1599                                                         runp->line,
1600                                                         _("symbol `%s' not defined"),
1601                                                         runp->weights[i].w[j]->name));
1602
1603                         need_undefined = 1;
1604                         runp->weights[i].w[j] = &collate->undefined;
1605                       }
1606                     else
1607                       /* Set the bit for the level.  */
1608                       runp->weights[i].w[j]->used_in_level |= 1 << i;
1609                   }
1610             }
1611         }
1612
1613       /* Up to the next entry.  */
1614       runp = runp->next;
1615     }
1616
1617   /* Walk through the list of defined sequences and assign weights.  Also
1618      create the data structure which will allow generating the single byte
1619      character based tables.
1620
1621      Since at each time only the weights for each of the rules are
1622      only compared to other weights for this rule it is possible to
1623      assign more compact weight values than simply counting all
1624      weights in sequence.  We can assign weights from 3, one for each
1625      rule individually and only for those elements, which are actually
1626      used for this rule.
1627
1628      Why is this important?  It is not for the wide char table.  But
1629      it is for the singlebyte output since here larger numbers have to
1630      be encoded to make it possible to emit the value as a byte
1631      string.  */
1632   for (i = 0; i < nrules; ++i)
1633     mbact[i] = 2;
1634   wcact = 2;
1635   mbseqact = 0;
1636   wcseqact = 0;
1637   runp = collate->start;
1638   while (runp != NULL)
1639     {
1640       /* Determine the order.  */
1641       if (runp->used_in_level != 0)
1642         {
1643           runp->mborder = (int *) obstack_alloc (&collate->mempool,
1644                                                  nrules * sizeof (int));
1645
1646           for (i = 0; i < nrules; ++i)
1647             if ((runp->used_in_level & (1 << i)) != 0)
1648               runp->mborder[i] = mbact[i]++;
1649             else
1650               runp->mborder[i] = 0;
1651         }
1652
1653       if (runp->mbs != NULL)
1654         {
1655           struct element_t **eptr;
1656           struct element_t *lastp = NULL;
1657
1658           /* Find the point where to insert in the list.  */
1659           eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1660           while (*eptr != NULL)
1661             {
1662               if ((*eptr)->nmbs < runp->nmbs)
1663                 break;
1664
1665               if ((*eptr)->nmbs == runp->nmbs)
1666                 {
1667                   int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1668
1669                   if (c == 0)
1670                     {
1671                       /* This should not happen.  It means that we have
1672                          to symbols with the same byte sequence.  It is
1673                          of course an error.  */
1674                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1675                                                       (*eptr)->line,
1676                                                       _("\
1677 symbol `%s' has the same encoding as"), (*eptr)->name);
1678                                        error_at_line (0, 0, runp->file,
1679                                                       runp->line,
1680                                                       _("symbol `%s'"),
1681                                                       runp->name));
1682                       goto dont_insert;
1683                     }
1684                   else if (c < 0)
1685                     /* Insert it here.  */
1686                     break;
1687                 }
1688
1689               /* To the next entry.  */
1690               lastp = *eptr;
1691               eptr = &(*eptr)->mbnext;
1692             }
1693
1694           /* Set the pointers.  */
1695           runp->mbnext = *eptr;
1696           runp->mblast = lastp;
1697           if (*eptr != NULL)
1698             (*eptr)->mblast = runp;
1699           *eptr = runp;
1700         dont_insert:
1701           ;
1702         }
1703
1704       if (runp->used_in_level)
1705         {
1706           runp->wcorder = wcact++;
1707
1708           /* We take the opportunity to count the elements which have
1709              wide characters.  */
1710           ++nr_wide_elems;
1711         }
1712
1713       if (runp->is_character)
1714         {
1715           if (runp->nmbs == 1)
1716             collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1717
1718           runp->wcseqorder = wcseqact++;
1719         }
1720       else if (runp->mbs != NULL && runp->weights != NULL)
1721         /* This is for collation elements.  */
1722         runp->wcseqorder = wcseqact++;
1723
1724       /* Up to the next entry.  */
1725       runp = runp->next;
1726     }
1727
1728   /* Find out whether any of the `mbheads' entries is unset.  In this
1729      case we use the UNDEFINED entry.  */
1730   for (i = 1; i < 256; ++i)
1731     if (collate->mbheads[i] == NULL)
1732       {
1733         need_undefined = 1;
1734         collate->mbheads[i] = &collate->undefined;
1735       }
1736
1737   /* Now to the wide character case.  */
1738   collate->wcheads.p = 6;
1739   collate->wcheads.q = 10;
1740   wchead_table_init (&collate->wcheads);
1741
1742   collate->wcseqorder.p = 6;
1743   collate->wcseqorder.q = 10;
1744   collseq_table_init (&collate->wcseqorder);
1745
1746   /* Start adding.  */
1747   runp = collate->start;
1748   while (runp != NULL)
1749     {
1750       if (runp->wcs != NULL)
1751         {
1752           struct element_t *e;
1753           struct element_t **eptr;
1754           struct element_t *lastp;
1755
1756           /* Insert the collation sequence value.  */
1757           if (runp->is_character)
1758             collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1759                                runp->wcseqorder);
1760
1761           /* Find the point where to insert in the list.  */
1762           e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1763           eptr = &e;
1764           lastp = NULL;
1765           while (*eptr != NULL)
1766             {
1767               if ((*eptr)->nwcs < runp->nwcs)
1768                 break;
1769
1770               if ((*eptr)->nwcs == runp->nwcs)
1771                 {
1772                   int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1773                                    (wchar_t *) runp->wcs, runp->nwcs);
1774
1775                   if (c == 0)
1776                     {
1777                       /* This should not happen.  It means that we have
1778                          two symbols with the same byte sequence.  It is
1779                          of course an error.  */
1780                       WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1781                                                       (*eptr)->line,
1782                                                       _("\
1783 symbol `%s' has the same encoding as"), (*eptr)->name);
1784                                        error_at_line (0, 0, runp->file,
1785                                                       runp->line,
1786                                                       _("symbol `%s'"),
1787                                                       runp->name));
1788                       goto dont_insertwc;
1789                     }
1790                   else if (c < 0)
1791                     /* Insert it here.  */
1792                     break;
1793                 }
1794
1795               /* To the next entry.  */
1796               lastp = *eptr;
1797               eptr = &(*eptr)->wcnext;
1798             }
1799
1800           /* Set the pointers.  */
1801           runp->wcnext = *eptr;
1802           runp->wclast = lastp;
1803           if (*eptr != NULL)
1804             (*eptr)->wclast = runp;
1805           *eptr = runp;
1806           if (eptr == &e)
1807             wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1808         dont_insertwc:
1809           ;
1810         }
1811
1812       /* Up to the next entry.  */
1813       runp = runp->next;
1814     }
1815
1816   collseq_table_finalize (&collate->wcseqorder);
1817
1818   /* Now determine whether the UNDEFINED entry is needed and if yes,
1819      whether it was defined.  */
1820   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1821   if (collate->undefined.file == NULL)
1822     {
1823       if (need_undefined)
1824         {
1825           /* This seems not to be enforced by recent standards.  Don't
1826              emit an error, simply append UNDEFINED at the end.  */
1827           if (0)
1828             WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1829
1830           /* Add UNDEFINED at the end.  */
1831           collate->undefined.mborder =
1832             (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1833
1834           for (i = 0; i < nrules; ++i)
1835             collate->undefined.mborder[i] = mbact[i]++;
1836         }
1837
1838       /* In any case we will need the definition for the wide character
1839          case.  But we will not complain that it is missing since the
1840          specification strangely enough does not seem to account for
1841          this.  */
1842       collate->undefined.wcorder = wcact++;
1843     }
1844
1845   /* Finally, try to unify the rules for the sections.  Whenever the rules
1846      for a section are the same as those for another section give the
1847      ruleset the same index.  Since there are never many section we can
1848      use an O(n^2) algorithm here.  */
1849   sect = collate->sections;
1850   while (sect != NULL && sect->rules == NULL)
1851     sect = sect->next;
1852
1853   /* Bail out if we have no sections because of earlier errors.  */
1854   if (sect == NULL)
1855     {
1856       WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1857                               _("too many errors; giving up")));
1858       return;
1859     }
1860
1861   ruleidx = 0;
1862   do
1863     {
1864       struct section_list *osect = collate->sections;
1865
1866       while (osect != sect)
1867         if (osect->rules != NULL
1868             && memcmp (osect->rules, sect->rules,
1869                        nrules * sizeof (osect->rules[0])) == 0)
1870           break;
1871         else
1872           osect = osect->next;
1873
1874       if (osect == sect)
1875         sect->ruleidx = ruleidx++;
1876       else
1877         sect->ruleidx = osect->ruleidx;
1878
1879       /* Next section.  */
1880       do
1881         sect = sect->next;
1882       while (sect != NULL && sect->rules == NULL);
1883     }
1884   while (sect != NULL);
1885   /* We are currently not prepared for more than 128 rulesets.  But this
1886      should never really be a problem.  */
1887   assert (ruleidx <= 128);
1888 }
1889
1890
1891 static int32_t
1892 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1893                struct element_t *elem)
1894 {
1895   size_t cnt;
1896   int32_t retval;
1897
1898   /* Optimize the use of UNDEFINED.  */
1899   if (elem == &collate->undefined)
1900     /* The weights are already inserted.  */
1901     return 0;
1902
1903   /* This byte can start exactly one collation element and this is
1904      a single byte.  We can directly give the index to the weights.  */
1905   retval = obstack_object_size (pool);
1906
1907   /* Construct the weight.  */
1908   for (cnt = 0; cnt < nrules; ++cnt)
1909     {
1910       char buf[elem->weights[cnt].cnt * 7];
1911       int len = 0;
1912       int i;
1913
1914       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1915         /* Encode the weight value.  We do nothing for IGNORE entries.  */
1916         if (elem->weights[cnt].w[i] != NULL)
1917           len += utf8_encode (&buf[len],
1918                               elem->weights[cnt].w[i]->mborder[cnt]);
1919
1920       /* And add the buffer content.  */
1921       obstack_1grow (pool, len);
1922       obstack_grow (pool, buf, len);
1923     }
1924
1925   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1926 }
1927
1928
1929 static int32_t
1930 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1931                  struct element_t *elem)
1932 {
1933   size_t cnt;
1934   int32_t retval;
1935
1936   /* Optimize the use of UNDEFINED.  */
1937   if (elem == &collate->undefined)
1938     /* The weights are already inserted.  */
1939     return 0;
1940
1941   /* This byte can start exactly one collation element and this is
1942      a single byte.  We can directly give the index to the weights.  */
1943   retval = obstack_object_size (pool) / sizeof (int32_t);
1944
1945   /* Construct the weight.  */
1946   for (cnt = 0; cnt < nrules; ++cnt)
1947     {
1948       int32_t buf[elem->weights[cnt].cnt];
1949       int i;
1950       int32_t j;
1951
1952       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1953         if (elem->weights[cnt].w[i] != NULL)
1954           buf[j++] = elem->weights[cnt].w[i]->wcorder;
1955
1956       /* And add the buffer content.  */
1957       obstack_int32_grow (pool, j);
1958
1959       obstack_grow (pool, buf, j * sizeof (int32_t));
1960     }
1961
1962   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1963 }
1964
1965 /* If localedef is every threaded, this would need to be __thread var.  */
1966 static struct
1967 {
1968   struct obstack *weightpool;
1969   struct obstack *extrapool;
1970   struct obstack *indpool;
1971   struct locale_collate_t *collate;
1972   struct collidx_table *tablewc;
1973 } atwc;
1974
1975 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1976
1977 static void
1978 add_to_tablewc (uint32_t ch, struct element_t *runp)
1979 {
1980   if (runp->wcnext == NULL && runp->nwcs == 1)
1981     {
1982       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1983                                            runp);
1984       collidx_table_add (atwc.tablewc, ch, weigthidx);
1985     }
1986   else
1987     {
1988       /* As for the singlebyte table, we recognize sequences and
1989          compress them.  */
1990       struct element_t *lastp;
1991
1992       collidx_table_add (atwc.tablewc, ch,
1993                          -(obstack_object_size (atwc.extrapool)
1994                          / sizeof (uint32_t)));
1995
1996       do
1997         {
1998           /* Store the current index in the weight table.  We know that
1999              the current position in the `extrapool' is aligned on a
2000              32-bit address.  */
2001           int32_t weightidx;
2002           int added;
2003
2004           /* Find out wether this is a single entry or we have more than
2005              one consecutive entry.  */
2006           if (runp->wcnext != NULL
2007               && runp->nwcs == runp->wcnext->nwcs
2008               && wmemcmp ((wchar_t *) runp->wcs,
2009                           (wchar_t *)runp->wcnext->wcs,
2010                           runp->nwcs - 1) == 0
2011               && (runp->wcs[runp->nwcs - 1]
2012                   == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2013             {
2014               int i;
2015               struct element_t *series_startp = runp;
2016               struct element_t *curp;
2017
2018               /* Now add first the initial byte sequence.  */
2019               added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2020               if (sizeof (int32_t) == sizeof (int))
2021                 obstack_make_room (atwc.extrapool, added);
2022
2023               /* More than one consecutive entry.  We mark this by having
2024                  a negative index into the indirect table.  */
2025               obstack_int32_grow_fast (atwc.extrapool,
2026                                        -(obstack_object_size (atwc.indpool)
2027                                          / sizeof (int32_t)));
2028               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2029
2030               do
2031                 runp = runp->wcnext;
2032               while (runp->wcnext != NULL
2033                      && runp->nwcs == runp->wcnext->nwcs
2034                      && wmemcmp ((wchar_t *) runp->wcs,
2035                                  (wchar_t *)runp->wcnext->wcs,
2036                                  runp->nwcs - 1) == 0
2037                      && (runp->wcs[runp->nwcs - 1]
2038                          == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2039
2040               /* Now walk backward from here to the beginning.  */
2041               curp = runp;
2042
2043               for (i = 1; i < runp->nwcs; ++i)
2044                 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2045
2046               /* Now find the end of the consecutive sequence and
2047                  add all the indeces in the indirect pool.  */
2048               do
2049                 {
2050                   weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2051                                                curp);
2052                   obstack_int32_grow (atwc.indpool, weightidx);
2053
2054                   curp = curp->wclast;
2055                 }
2056               while (curp != series_startp);
2057
2058               /* Add the final weight.  */
2059               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2060                                            curp);
2061               obstack_int32_grow (atwc.indpool, weightidx);
2062
2063               /* And add the end byte sequence.  Without length this
2064                  time.  */
2065               for (i = 1; i < curp->nwcs; ++i)
2066                 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2067             }
2068           else
2069             {
2070               /* A single entry.  Simply add the index and the length and
2071                  string (except for the first character which is already
2072                  tested for).  */
2073               int i;
2074
2075               /* Output the weight info.  */
2076               weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2077                                            runp);
2078
2079               added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2080               if (sizeof (int) == sizeof (int32_t))
2081                 obstack_make_room (atwc.extrapool, added);
2082
2083               obstack_int32_grow_fast (atwc.extrapool, weightidx);
2084               obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2085               for (i = 1; i < runp->nwcs; ++i)
2086                 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2087             }
2088
2089           /* Next entry.  */
2090           lastp = runp;
2091           runp = runp->wcnext;
2092         }
2093       while (runp != NULL);
2094     }
2095 }
2096
2097 void
2098 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2099                 const char *output_path)
2100 {
2101   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2102   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2103   struct iovec iov[2 + nelems];
2104   struct locale_file data;
2105   uint32_t idx[nelems];
2106   size_t cnt;
2107   size_t ch;
2108   int32_t tablemb[256];
2109   struct obstack weightpool;
2110   struct obstack extrapool;
2111   struct obstack indirectpool;
2112   struct section_list *sect;
2113   struct collidx_table tablewc;
2114   uint32_t elem_size;
2115   uint32_t *elem_table;
2116   int i;
2117   struct element_t *runp;
2118
2119   data.magic = LIMAGIC (LC_COLLATE);
2120   data.n = nelems;
2121   iov[0].iov_base = (void *) &data;
2122   iov[0].iov_len = sizeof (data);
2123
2124   iov[1].iov_base = (void *) idx;
2125   iov[1].iov_len = sizeof (idx);
2126
2127   idx[0] = iov[0].iov_len + iov[1].iov_len;
2128   cnt = 0;
2129
2130   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
2131   iov[2 + cnt].iov_base = &nrules;
2132   iov[2 + cnt].iov_len = sizeof (uint32_t);
2133   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2134   ++cnt;
2135
2136   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2137   if (collate == NULL)
2138     {
2139       int32_t dummy = 0;
2140
2141       while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2142         {
2143           /* The words have to be handled specially.  */
2144           if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2145             {
2146               iov[2 + cnt].iov_base = &dummy;
2147               iov[2 + cnt].iov_len = sizeof (int32_t);
2148             }
2149           else
2150             {
2151               iov[2 + cnt].iov_base = NULL;
2152               iov[2 + cnt].iov_len = 0;
2153             }
2154
2155           if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
2156             idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2157           ++cnt;
2158         }
2159
2160       assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2161
2162       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2163
2164       return;
2165     }
2166
2167   obstack_init (&weightpool);
2168   obstack_init (&extrapool);
2169   obstack_init (&indirectpool);
2170
2171   /* Since we are using the sign of an integer to mark indirection the
2172      offsets in the arrays we are indirectly referring to must not be
2173      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2174   obstack_int32_grow (&extrapool, 0);
2175   obstack_int32_grow (&indirectpool, 0);
2176
2177   /* Prepare the ruleset table.  */
2178   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2179     if (sect->rules != NULL && sect->ruleidx == i)
2180       {
2181         int j;
2182
2183         obstack_make_room (&weightpool, nrules);
2184
2185         for (j = 0; j < nrules; ++j)
2186           obstack_1grow_fast (&weightpool, sect->rules[j]);
2187         ++i;
2188       }
2189   /* And align the output.  */
2190   i = (nrules * i) % __alignof__ (int32_t);
2191   if (i > 0)
2192     do
2193       obstack_1grow (&weightpool, '\0');
2194     while (++i < __alignof__ (int32_t));
2195
2196   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
2197   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2198   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2199   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2200   ++cnt;
2201
2202   /* Generate the 8-bit table.  Walk through the lists of sequences
2203      starting with the same byte and add them one after the other to
2204      the table.  In case we have more than one sequence starting with
2205      the same byte we have to use extra indirection.
2206
2207      First add a record for the NUL byte.  This entry will never be used
2208      so it does not matter.  */
2209   tablemb[0] = 0;
2210
2211   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2212      will probably be used more than once it is good to store the
2213      weights only once.  */
2214   if (collate->undefined.used_in_level != 0)
2215     output_weight (&weightpool, collate, &collate->undefined);
2216
2217   for (ch = 1; ch < 256; ++ch)
2218     if (collate->mbheads[ch]->mbnext == NULL
2219         && collate->mbheads[ch]->nmbs <= 1)
2220       {
2221         tablemb[ch] = output_weight (&weightpool, collate,
2222                                      collate->mbheads[ch]);
2223       }
2224     else
2225       {
2226         /* The entries in the list are sorted by length and then
2227            alphabetically.  This is the order in which we will add the
2228            elements to the collation table.  This allows simply walking
2229            the table in sequence and stopping at the first matching
2230            entry.  Since the longer sequences are coming first in the
2231            list they have the possibility to match first, just as it
2232            has to be.  In the worst case we are walking to the end of
2233            the list where we put, if no singlebyte sequence is defined
2234            in the locale definition, the weights for UNDEFINED.
2235
2236            To reduce the length of the search list we compress them a bit.
2237            This happens by collecting sequences of consecutive byte
2238            sequences in one entry (having and begin and end byte sequence)
2239            and add only one index into the weight table.  We can find the
2240            consecutive entries since they are also consecutive in the list.  */
2241         struct element_t *runp = collate->mbheads[ch];
2242         struct element_t *lastp;
2243
2244         assert ((obstack_object_size (&extrapool)
2245                  & (__alignof__ (int32_t) - 1)) == 0);
2246
2247         tablemb[ch] = -obstack_object_size (&extrapool);
2248
2249         do
2250           {
2251             /* Store the current index in the weight table.  We know that
2252                the current position in the `extrapool' is aligned on a
2253                32-bit address.  */
2254             int32_t weightidx;
2255             int added;
2256
2257             /* Find out wether this is a single entry or we have more than
2258                one consecutive entry.  */
2259             if (runp->mbnext != NULL
2260                 && runp->nmbs == runp->mbnext->nmbs
2261                 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2262                 && (runp->mbs[runp->nmbs - 1]
2263                     == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2264               {
2265                 int i;
2266                 struct element_t *series_startp = runp;
2267                 struct element_t *curp;
2268
2269                 /* Compute how much space we will need.  */
2270                 added = ((sizeof (int32_t) + 1 + 2 * (runp->nmbs - 1)
2271                           + __alignof__ (int32_t) - 1)
2272                          & ~(__alignof__ (int32_t) - 1));
2273                 assert ((obstack_object_size (&extrapool)
2274                          & (__alignof__ (int32_t) - 1)) == 0);
2275                 obstack_make_room (&extrapool, added);
2276
2277                 /* More than one consecutive entry.  We mark this by having
2278                    a negative index into the indirect table.  */
2279                 obstack_int32_grow_fast (&extrapool,
2280                                          -(obstack_object_size (&indirectpool)
2281                                            / sizeof (int32_t)));
2282
2283                 /* Now search first the end of the series.  */
2284                 do
2285                   runp = runp->mbnext;
2286                 while (runp->mbnext != NULL
2287                        && runp->nmbs == runp->mbnext->nmbs
2288                        && memcmp (runp->mbs, runp->mbnext->mbs,
2289                                   runp->nmbs - 1) == 0
2290                        && (runp->mbs[runp->nmbs - 1]
2291                            == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2292
2293                 /* Now walk backward from here to the beginning.  */
2294                 curp = runp;
2295
2296                 assert (runp->nmbs <= 256);
2297                 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2298                 for (i = 1; i < curp->nmbs; ++i)
2299                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2300
2301                 /* Now find the end of the consecutive sequence and
2302                    add all the indeces in the indirect pool.  */
2303                 do
2304                   {
2305                     weightidx = output_weight (&weightpool, collate, curp);
2306                     obstack_int32_grow (&indirectpool, weightidx);
2307
2308                     curp = curp->mblast;
2309                   }
2310                 while (curp != series_startp);
2311
2312                 /* Add the final weight.  */
2313                 weightidx = output_weight (&weightpool, collate, curp);
2314                 obstack_int32_grow (&indirectpool, weightidx);
2315
2316                 /* And add the end byte sequence.  Without length this
2317                    time.  */
2318                 for (i = 1; i < curp->nmbs; ++i)
2319                   obstack_1grow_fast (&extrapool, curp->mbs[i]);
2320               }
2321             else
2322               {
2323                 /* A single entry.  Simply add the index and the length and
2324                    string (except for the first character which is already
2325                    tested for).  */
2326                 int i;
2327
2328                 /* Output the weight info.  */
2329                 weightidx = output_weight (&weightpool, collate, runp);
2330
2331                 added = ((sizeof (int32_t) + 1 + runp->nmbs - 1
2332                           + __alignof__ (int32_t) - 1)
2333                          & ~(__alignof__ (int32_t) - 1));
2334                 assert ((obstack_object_size (&extrapool)
2335                          & (__alignof__ (int32_t) - 1)) == 0);
2336                 obstack_make_room (&extrapool, added);
2337
2338                 obstack_int32_grow_fast (&extrapool, weightidx);
2339                 assert (runp->nmbs <= 256);
2340                 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2341
2342                 for (i = 1; i < runp->nmbs; ++i)
2343                   obstack_1grow_fast (&extrapool, runp->mbs[i]);
2344               }
2345
2346             /* Add alignment bytes if necessary.  */
2347             while ((obstack_object_size (&extrapool)
2348                     & (__alignof__ (int32_t) - 1)) != 0)
2349               obstack_1grow_fast (&extrapool, '\0');
2350
2351             /* Next entry.  */
2352             lastp = runp;
2353             runp = runp->mbnext;
2354           }
2355         while (runp != NULL);
2356
2357         assert ((obstack_object_size (&extrapool)
2358                  & (__alignof__ (int32_t) - 1)) == 0);
2359
2360         /* If the final entry in the list is not a single character we
2361            add an UNDEFINED entry here.  */
2362         if (lastp->nmbs != 1)
2363           {
2364             int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2365                          & ~(__alignof__ (int32_t) - 1));
2366             obstack_make_room (&extrapool, added);
2367
2368             obstack_int32_grow_fast (&extrapool, 0);
2369             /* XXX What rule? We just pick the first.  */
2370             obstack_1grow_fast (&extrapool, 0);
2371             /* Length is zero.  */
2372             obstack_1grow_fast (&extrapool, 0);
2373
2374             /* Add alignment bytes if necessary.  */
2375             while ((obstack_object_size (&extrapool)
2376                     & (__alignof__ (int32_t) - 1)) != 0)
2377               obstack_1grow_fast (&extrapool, '\0');
2378           }
2379       }
2380
2381   /* Add padding to the tables if necessary.  */
2382   while ((obstack_object_size (&weightpool) & (__alignof__ (int32_t) - 1))
2383          != 0)
2384     obstack_1grow (&weightpool, 0);
2385
2386   /* Now add the four tables.  */
2387   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
2388   iov[2 + cnt].iov_base = tablemb;
2389   iov[2 + cnt].iov_len = sizeof (tablemb);
2390   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2391   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2392   ++cnt;
2393
2394   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
2395   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2396   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2397   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2398   ++cnt;
2399
2400   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
2401   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2402   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2403   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2404   ++cnt;
2405
2406   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB));
2407   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2408   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2409   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2410   assert ((iov[2 + cnt].iov_len & (__alignof__ (int32_t) - 1)) == 0);
2411   ++cnt;
2412
2413
2414   /* Now the same for the wide character table.  We need to store some
2415      more information here.  */
2416   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP1));
2417   iov[2 + cnt].iov_base = NULL;
2418   iov[2 + cnt].iov_len = 0;
2419   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2420   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2421   ++cnt;
2422
2423   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP2));
2424   iov[2 + cnt].iov_base = NULL;
2425   iov[2 + cnt].iov_len = 0;
2426   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2427   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2428   ++cnt;
2429
2430   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_GAP3));
2431   iov[2 + cnt].iov_base = NULL;
2432   iov[2 + cnt].iov_len = 0;
2433   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2434   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2435   ++cnt;
2436
2437   /* Since we are using the sign of an integer to mark indirection the
2438      offsets in the arrays we are indirectly referring to must not be
2439      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2440   obstack_int32_grow (&extrapool, 0);
2441   obstack_int32_grow (&indirectpool, 0);
2442
2443   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2444      will probably be used more than once it is good to store the
2445      weights only once.  */
2446   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2447     abort ();
2448
2449   /* Generate the table.  Walk through the lists of sequences starting
2450      with the same wide character and add them one after the other to
2451      the table.  In case we have more than one sequence starting with
2452      the same byte we have to use extra indirection.  */
2453   tablewc.p = 6;
2454   tablewc.q = 10;
2455   collidx_table_init (&tablewc);
2456
2457   atwc.weightpool = &weightpool;
2458   atwc.extrapool = &extrapool;
2459   atwc.indpool = &indirectpool;
2460   atwc.collate = collate;
2461   atwc.tablewc = &tablewc;
2462
2463   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2464
2465   memset (&atwc, 0, sizeof (atwc));
2466
2467   collidx_table_finalize (&tablewc);
2468
2469   /* Now add the four tables.  */
2470   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC));
2471   iov[2 + cnt].iov_base = tablewc.result;
2472   iov[2 + cnt].iov_len = tablewc.result_size;
2473   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2474   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2475   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2476   ++cnt;
2477
2478   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC));
2479   iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
2480   iov[2 + cnt].iov_base = obstack_finish (&weightpool);
2481   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2482   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2483   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2484   ++cnt;
2485
2486   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC));
2487   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2488   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2489   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2490   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2491   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2492   ++cnt;
2493
2494   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC));
2495   iov[2 + cnt].iov_len = obstack_object_size (&indirectpool);
2496   iov[2 + cnt].iov_base = obstack_finish (&indirectpool);
2497   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2498   assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0);
2499   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2500   ++cnt;
2501
2502
2503   /* Finally write the table with collation element names out.  It is
2504      a hash table with a simple function which gets the name of the
2505      character as the input.  One character might have many names.  The
2506      value associated with the name is an index into the weight table
2507      where we are then interested in the first-level weight value.
2508
2509      To determine how large the table should be we are counting the
2510      elements have to put in.  Since we are using internal chaining
2511      using a secondary hash function we have to make the table a bit
2512      larger to avoid extremely long search times.  We can achieve
2513      good results with a 40% larger table than there are entries.  */
2514   elem_size = 0;
2515   runp = collate->start;
2516   while (runp != NULL)
2517     {
2518       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2519         /* Yep, the element really counts.  */
2520         ++elem_size;
2521
2522       runp = runp->next;
2523     }
2524   /* Add 40% and find the next prime number.  */
2525   elem_size = next_prime (elem_size * 1.4);
2526
2527   /* Allocate the table.  Each entry consists of two words: the hash
2528      value and an index in a secondary table which provides the index
2529      into the weight table and the string itself (so that a match can
2530      be determined).  */
2531   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2532                                            elem_size * 2 * sizeof (uint32_t));
2533   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2534
2535   /* Now add the elements.  */
2536   runp = collate->start;
2537   while (runp != NULL)
2538     {
2539       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2540         {
2541           /* Compute the hash value of the name.  */
2542           uint32_t namelen = strlen (runp->name);
2543           uint32_t hash = elem_hash (runp->name, namelen);
2544           size_t idx = hash % elem_size;
2545 #ifndef NDEBUG
2546           size_t start_idx = idx;
2547 #endif
2548
2549           if (elem_table[idx * 2] != 0)
2550             {
2551               /* The spot is already taken.  Try iterating using the value
2552                  from the secondary hashing function.  */
2553               size_t iter = hash % (elem_size - 2) + 1;
2554
2555               do
2556                 {
2557                   idx += iter;
2558                   if (idx >= elem_size)
2559                     idx -= elem_size;
2560                   assert (idx != start_idx);
2561                 }
2562               while (elem_table[idx * 2] != 0);
2563             }
2564           /* This is the spot where we will insert the value.  */
2565           elem_table[idx * 2] = hash;
2566           elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2567
2568           /* The the string itself including length.  */
2569           obstack_1grow (&extrapool, namelen);
2570           obstack_grow (&extrapool, runp->name, namelen);
2571
2572           /* And the multibyte representation.  */
2573           obstack_1grow (&extrapool, runp->nmbs);
2574           obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2575
2576           /* And align again to 32 bits.  */
2577           if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2578             obstack_grow (&extrapool, "\0\0",
2579                           (sizeof (int32_t)
2580                            - ((1 + namelen + 1 + runp->nmbs)
2581                               % sizeof (int32_t))));
2582
2583           /* Now some 32-bit values: multibyte collation sequence,
2584              wide char string (including length), and wide char
2585              collation sequence.  */
2586           obstack_int32_grow (&extrapool, runp->mbseqorder);
2587
2588           obstack_int32_grow (&extrapool, runp->nwcs);
2589           obstack_grow (&extrapool, runp->wcs,
2590                         runp->nwcs * sizeof (uint32_t));
2591
2592           obstack_int32_grow (&extrapool, runp->wcseqorder);
2593         }
2594
2595       runp = runp->next;
2596     }
2597
2598   /* Prepare to write out this data.  */
2599   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
2600   iov[2 + cnt].iov_base = &elem_size;
2601   iov[2 + cnt].iov_len = sizeof (int32_t);
2602   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2603   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2604   ++cnt;
2605
2606   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
2607   iov[2 + cnt].iov_base = elem_table;
2608   iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
2609   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2610   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2611   ++cnt;
2612
2613   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
2614   iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
2615   iov[2 + cnt].iov_base = obstack_finish (&extrapool);
2616   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2617   ++cnt;
2618
2619   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB));
2620   iov[2 + cnt].iov_base = collate->mbseqorder;
2621   iov[2 + cnt].iov_len = 256;
2622   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2623   ++cnt;
2624
2625   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC));
2626   iov[2 + cnt].iov_base = collate->wcseqorder.result;
2627   iov[2 + cnt].iov_len = collate->wcseqorder.result_size;
2628   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
2629   assert (idx[cnt] % __alignof__ (int32_t) == 0);
2630   ++cnt;
2631
2632   assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_CODESET));
2633   iov[2 + cnt].iov_base = (void *) charmap->code_set_name;
2634   iov[2 + cnt].iov_len = strlen (iov[2 + cnt].iov_base) + 1;
2635   ++cnt;
2636
2637   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
2638
2639   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", 2 + cnt, iov);
2640
2641   obstack_free (&weightpool, NULL);
2642   obstack_free (&extrapool, NULL);
2643   obstack_free (&indirectpool, NULL);
2644 }
2645
2646
2647 static enum token_t
2648 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2649          const struct charmap_t *charmap, int to_endif)
2650 {
2651   while (1)
2652     {
2653       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2654       enum token_t nowtok = now->tok;
2655
2656       if (nowtok == tok_eof || nowtok == tok_end)
2657         return nowtok;
2658
2659       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2660         {
2661           lr_error (ldfile, _("%s: nested conditionals not supported"),
2662                     "LC_COLLATE");
2663           nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2664           if (nowtok == tok_eof || nowtok == tok_end)
2665             return nowtok;
2666         }
2667       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2668         {
2669           lr_ignore_rest (ldfile, 1);
2670           return nowtok;
2671         }
2672       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2673         {
2674           /* Do not read the rest of the line.  */
2675           return nowtok;
2676         }
2677       else if (nowtok == tok_else)
2678         {
2679           lr_error (ldfile, _("%s: more then one 'else'"), "LC_COLLATE");
2680         }
2681
2682       lr_ignore_rest (ldfile, 0);
2683     }
2684 }
2685
2686
2687 void
2688 collate_read (struct linereader *ldfile, struct localedef_t *result,
2689               const struct charmap_t *charmap, const char *repertoire_name,
2690               int ignore_content)
2691 {
2692   struct repertoire_t *repertoire = NULL;
2693   struct locale_collate_t *collate;
2694   struct token *now;
2695   struct token *arg = NULL;
2696   enum token_t nowtok;
2697   enum token_t was_ellipsis = tok_none;
2698   struct localedef_t *copy_locale = NULL;
2699   /* Parsing state:
2700      0 - start
2701      1 - between `order-start' and `order-end'
2702      2 - after `order-end'
2703      3 - after `reorder-after', waiting for `reorder-end'
2704      4 - after `reorder-end'
2705      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2706      6 - after `reorder-sections-end'
2707   */
2708   int state = 0;
2709
2710   /* Get the repertoire we have to use.  */
2711   if (repertoire_name != NULL)
2712     repertoire = repertoire_read (repertoire_name);
2713
2714   /* The rest of the line containing `LC_COLLATE' must be free.  */
2715   lr_ignore_rest (ldfile, 1);
2716
2717   while (1)
2718     {
2719       do
2720         {
2721           now = lr_token (ldfile, charmap, result, NULL, verbose);
2722           nowtok = now->tok;
2723         }
2724       while (nowtok == tok_eol);
2725
2726       if (nowtok != tok_define)
2727         break;
2728
2729       if (ignore_content)
2730         lr_ignore_rest (ldfile, 0);
2731       else
2732         {
2733           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2734           if (arg->tok != tok_ident)
2735             SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2736           else
2737             {
2738               /* Simply add the new symbol.  */
2739               struct name_list *newsym = xmalloc (sizeof (*newsym)
2740                                                   + arg->val.str.lenmb + 1);
2741               memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2742               newsym->str[arg->val.str.lenmb] = '\0';
2743               newsym->next = defined;
2744               defined = newsym;
2745
2746               lr_ignore_rest (ldfile, 1);
2747             }
2748         }
2749     }
2750
2751   if (nowtok == tok_copy)
2752     {
2753       now = lr_token (ldfile, charmap, result, NULL, verbose);
2754       if (now->tok != tok_string)
2755         {
2756           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2757
2758         skip_category:
2759           do
2760             now = lr_token (ldfile, charmap, result, NULL, verbose);
2761           while (now->tok != tok_eof && now->tok != tok_end);
2762
2763           if (now->tok != tok_eof
2764               || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2765                   now->tok == tok_eof))
2766             lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2767           else if (now->tok != tok_lc_collate)
2768             {
2769               lr_error (ldfile, _("\
2770 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2771               lr_ignore_rest (ldfile, 0);
2772             }
2773           else
2774             lr_ignore_rest (ldfile, 1);
2775
2776           return;
2777         }
2778
2779       if (! ignore_content)
2780         {
2781           /* Get the locale definition.  */
2782           copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2783                                      repertoire_name, charmap, NULL);
2784           if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2785             {
2786               /* Not yet loaded.  So do it now.  */
2787               if (locfile_read (copy_locale, charmap) != 0)
2788                 goto skip_category;
2789             }
2790
2791           if (copy_locale->categories[LC_COLLATE].collate == NULL)
2792             return;
2793         }
2794
2795       lr_ignore_rest (ldfile, 1);
2796
2797       now = lr_token (ldfile, charmap, result, NULL, verbose);
2798       nowtok = now->tok;
2799     }
2800
2801   /* Prepare the data structures.  */
2802   collate_startup (ldfile, result, copy_locale, ignore_content);
2803   collate = result->categories[LC_COLLATE].collate;
2804
2805   while (1)
2806     {
2807       char ucs4buf[10];
2808       char *symstr;
2809       size_t symlen;
2810
2811       /* Of course we don't proceed beyond the end of file.  */
2812       if (nowtok == tok_eof)
2813         break;
2814
2815       /* Ingore empty lines.  */
2816       if (nowtok == tok_eol)
2817         {
2818           now = lr_token (ldfile, charmap, result, NULL, verbose);
2819           nowtok = now->tok;
2820           continue;
2821         }
2822
2823       switch (nowtok)
2824         {
2825         case tok_copy:
2826           /* Allow copying other locales.  */
2827           now = lr_token (ldfile, charmap, result, NULL, verbose);
2828           if (now->tok != tok_string)
2829             goto err_label;
2830
2831           if (! ignore_content)
2832             load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2833                          charmap, result);
2834
2835           lr_ignore_rest (ldfile, 1);
2836           break;
2837
2838         case tok_coll_weight_max:
2839           /* Ignore the rest of the line if we don't need the input of
2840              this line.  */
2841           if (ignore_content)
2842             {
2843               lr_ignore_rest (ldfile, 0);
2844               break;
2845             }
2846
2847           if (state != 0)
2848             goto err_label;
2849
2850           arg = lr_token (ldfile, charmap, result, NULL, verbose);
2851           if (arg->tok != tok_number)
2852             goto err_label;
2853           if (collate->col_weight_max != -1)
2854             lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2855                       "LC_COLLATE", "col_weight_max");
2856           else
2857             collate->col_weight_max = arg->val.num;
2858           lr_ignore_rest (ldfile, 1);
2859           break;
2860
2861         case tok_section_symbol:
2862           /* Ignore the rest of the line if we don't need the input of
2863              this line.  */
2864           if (ignore_content)
2865             {
2866               lr_ignore_rest (ldfile, 0);
2867               break;
2868             }
2869
2870           if (state != 0)
2871             goto err_label;
2872
2873           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2874           if (arg->tok != tok_bsymbol)
2875             goto err_label;
2876           else if (!ignore_content)
2877             {
2878               /* Check whether this section is already known.  */
2879               struct section_list *known = collate->sections;
2880               while (known != NULL)
2881                 {
2882                   if (strcmp (known->name, arg->val.str.startmb) == 0)
2883                     break;
2884                   known = known->next;
2885                 }
2886
2887               if (known != NULL)
2888                 {
2889                   lr_error (ldfile,
2890                             _("%s: duplicate declaration of section `%s'"),
2891                             "LC_COLLATE", arg->val.str.startmb);
2892                   free (arg->val.str.startmb);
2893                 }
2894               else
2895                 collate->sections = make_seclist_elem (collate,
2896                                                        arg->val.str.startmb,
2897                                                        collate->sections);
2898
2899               lr_ignore_rest (ldfile, known == NULL);
2900             }
2901           else
2902             {
2903               free (arg->val.str.startmb);
2904               lr_ignore_rest (ldfile, 0);
2905             }
2906           break;
2907
2908         case tok_collating_element:
2909           /* Ignore the rest of the line if we don't need the input of
2910              this line.  */
2911           if (ignore_content)
2912             {
2913               lr_ignore_rest (ldfile, 0);
2914               break;
2915             }
2916
2917           if (state != 0 && state != 2)
2918             goto err_label;
2919
2920           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2921           if (arg->tok != tok_bsymbol)
2922             goto err_label;
2923           else
2924             {
2925               const char *symbol = arg->val.str.startmb;
2926               size_t symbol_len = arg->val.str.lenmb;
2927
2928               /* Next the `from' keyword.  */
2929               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2930               if (arg->tok != tok_from)
2931                 {
2932                   free ((char *) symbol);
2933                   goto err_label;
2934                 }
2935
2936               ldfile->return_widestr = 1;
2937               ldfile->translate_strings = 1;
2938
2939               /* Finally the string with the replacement.  */
2940               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2941
2942               ldfile->return_widestr = 0;
2943               ldfile->translate_strings = 0;
2944
2945               if (arg->tok != tok_string)
2946                 goto err_label;
2947
2948               if (!ignore_content && symbol != NULL)
2949                 {
2950                   /* The name is already defined.  */
2951                   if (check_duplicate (ldfile, collate, charmap,
2952                                        repertoire, symbol, symbol_len))
2953                     goto col_elem_free;
2954
2955                   if (arg->val.str.startmb != NULL)
2956                     insert_entry (&collate->elem_table, symbol, symbol_len,
2957                                   new_element (collate,
2958                                                arg->val.str.startmb,
2959                                                arg->val.str.lenmb - 1,
2960                                                arg->val.str.startwc,
2961                                                symbol, symbol_len, 0));
2962                 }
2963               else
2964                 {
2965                 col_elem_free:
2966                   free ((char *) symbol);
2967                   free (arg->val.str.startmb);
2968                   free (arg->val.str.startwc);
2969                 }
2970               lr_ignore_rest (ldfile, 1);
2971             }
2972           break;
2973
2974         case tok_collating_symbol:
2975           /* Ignore the rest of the line if we don't need the input of
2976              this line.  */
2977           if (ignore_content)
2978             {
2979               lr_ignore_rest (ldfile, 0);
2980               break;
2981             }
2982
2983           if (state != 0 && state != 2)
2984             goto err_label;
2985
2986           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2987           if (arg->tok != tok_bsymbol)
2988             goto err_label;
2989           else
2990             {
2991               char *symbol = arg->val.str.startmb;
2992               size_t symbol_len = arg->val.str.lenmb;
2993               char *endsymbol = NULL;
2994               size_t endsymbol_len = 0;
2995               enum token_t ellipsis = tok_none;
2996
2997               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2998               if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2999                 {
3000                   ellipsis = arg->tok;
3001
3002                   arg = lr_token (ldfile, charmap, result, repertoire,
3003                                   verbose);
3004                   if (arg->tok != tok_bsymbol)
3005                     {
3006                       free (symbol);
3007                       goto err_label;
3008                     }
3009
3010                   endsymbol = arg->val.str.startmb;
3011                   endsymbol_len = arg->val.str.lenmb;
3012
3013                   lr_ignore_rest (ldfile, 1);
3014                 }
3015               else if (arg->tok != tok_eol)
3016                 {
3017                   free (symbol);
3018                   goto err_label;
3019                 }
3020
3021               if (!ignore_content)
3022                 {
3023                   if (symbol == NULL
3024                       || (ellipsis != tok_none && endsymbol == NULL))
3025                     {
3026                       lr_error (ldfile, _("\
3027 %s: unknown character in collating symbol name"),
3028                                 "LC_COLLATE");
3029                       goto col_sym_free;
3030                     }
3031                   else if (ellipsis == tok_none)
3032                     {
3033                       /* A single symbol, no ellipsis.  */
3034                       if (check_duplicate (ldfile, collate, charmap,
3035                                            repertoire, symbol, symbol_len))
3036                         /* The name is already defined.  */
3037                         goto col_sym_free;
3038
3039                       insert_entry (&collate->sym_table, symbol, symbol_len,
3040                                     new_symbol (collate, symbol, symbol_len));
3041                     }
3042                   else if (symbol_len != endsymbol_len)
3043                     {
3044                     col_sym_inv_range:
3045                       lr_error (ldfile,
3046                                 _("invalid names for character range"));
3047                       goto col_sym_free;
3048                     }
3049                   else
3050                     {
3051                       /* Oh my, we have to handle an ellipsis.  First, as
3052                          usual, determine the common prefix and then
3053                          convert the rest into a range.  */
3054                       size_t prefixlen;
3055                       unsigned long int from;
3056                       unsigned long int to;
3057                       char *endp;
3058
3059                       for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
3060                         if (symbol[prefixlen] != endsymbol[prefixlen])
3061                           break;
3062
3063                       /* Convert the rest into numbers.  */
3064                       symbol[symbol_len] = '\0';
3065                       from = strtoul (&symbol[prefixlen], &endp,
3066                                       ellipsis == tok_ellipsis2 ? 16 : 10);
3067                       if (*endp != '\0')
3068                         goto col_sym_inv_range;
3069
3070                       endsymbol[symbol_len] = '\0';
3071                       to = strtoul (&endsymbol[prefixlen], &endp,
3072                                     ellipsis == tok_ellipsis2 ? 16 : 10);
3073                       if (*endp != '\0')
3074                         goto col_sym_inv_range;
3075
3076                       if (from > to)
3077                         goto col_sym_inv_range;
3078
3079                       /* Now loop over all entries.  */
3080                       while (from <= to)
3081                         {
3082                           char *symbuf;
3083
3084                           symbuf = (char *) obstack_alloc (&collate->mempool,
3085                                                            symbol_len + 1);
3086
3087                           /* Create the name.  */
3088                           sprintf (symbuf,
3089                                    ellipsis == tok_ellipsis2
3090                                    ? "%.*s%.*lX" : "%.*s%.*lu",
3091                                    (int) prefixlen, symbol,
3092                                    (int) (symbol_len - prefixlen), from);
3093
3094                           if (check_duplicate (ldfile, collate, charmap,
3095                                                repertoire, symbuf, symbol_len))
3096                             /* The name is already defined.  */
3097                             goto col_sym_free;
3098
3099                           insert_entry (&collate->sym_table, symbuf,
3100                                         symbol_len,
3101                                         new_symbol (collate, symbuf,
3102                                                     symbol_len));
3103
3104                           /* Increment the counter.  */
3105                           ++from;
3106                         }
3107
3108                       goto col_sym_free;
3109                     }
3110                 }
3111               else
3112                 {
3113                 col_sym_free:
3114                   free (symbol);
3115                   free (endsymbol);
3116                 }
3117             }
3118           break;
3119
3120         case tok_symbol_equivalence:
3121           /* Ignore the rest of the line if we don't need the input of
3122              this line.  */
3123           if (ignore_content)
3124             {
3125               lr_ignore_rest (ldfile, 0);
3126               break;
3127             }
3128
3129           if (state != 0)
3130             goto err_label;
3131
3132           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3133           if (arg->tok != tok_bsymbol)
3134             goto err_label;
3135           else
3136             {
3137               const char *newname = arg->val.str.startmb;
3138               size_t newname_len = arg->val.str.lenmb;
3139               const char *symname;
3140               size_t symname_len;
3141               void *symval;     /* Actually struct symbol_t*  */
3142
3143               arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3144               if (arg->tok != tok_bsymbol)
3145                 {
3146                   free ((char *) newname);
3147                   goto err_label;
3148                 }
3149
3150               symname = arg->val.str.startmb;
3151               symname_len = arg->val.str.lenmb;
3152
3153               if (newname == NULL)
3154                 {
3155                   lr_error (ldfile, _("\
3156 %s: unknown character in equivalent definition name"),
3157                             "LC_COLLATE");
3158
3159                 sym_equiv_free:
3160                   free ((char *) newname);
3161                   free ((char *) symname);
3162                   break;
3163                 }
3164               if (symname == NULL)
3165                 {
3166                   lr_error (ldfile, _("\
3167 %s: unknown character in equivalent definition value"),
3168                             "LC_COLLATE");
3169                   goto sym_equiv_free;
3170                 }
3171
3172               /* See whether the symbol name is already defined.  */
3173               if (find_entry (&collate->sym_table, symname, symname_len,
3174                               &symval) != 0)
3175                 {
3176                   lr_error (ldfile, _("\
3177 %s: unknown symbol `%s' in equivalent definition"),
3178                             "LC_COLLATE", symname);
3179                   goto sym_equiv_free;
3180                 }
3181
3182               if (insert_entry (&collate->sym_table,
3183                                 newname, newname_len, symval) < 0)
3184                 {
3185                   lr_error (ldfile, _("\
3186 error while adding equivalent collating symbol"));
3187                   goto sym_equiv_free;
3188                 }
3189
3190               free ((char *) symname);
3191             }
3192           lr_ignore_rest (ldfile, 1);
3193           break;
3194
3195         case tok_script:
3196           /* Ignore the rest of the line if we don't need the input of
3197              this line.  */
3198           if (ignore_content)
3199             {
3200               lr_ignore_rest (ldfile, 0);
3201               break;
3202             }
3203
3204           /* We get told about the scripts we know.  */
3205           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3206           if (arg->tok != tok_bsymbol)
3207             goto err_label;
3208           else
3209             {
3210               struct section_list *runp = collate->known_sections;
3211               char *name;
3212
3213               while (runp != NULL)
3214                 if (strncmp (runp->name, arg->val.str.startmb,
3215                              arg->val.str.lenmb) == 0
3216                     && runp->name[arg->val.str.lenmb] == '\0')
3217                   break;
3218                 else
3219                   runp = runp->def_next;
3220
3221               if (runp != NULL)
3222                 {
3223                   lr_error (ldfile, _("duplicate definition of script `%s'"),
3224                             runp->name);
3225                   lr_ignore_rest (ldfile, 0);
3226                   break;
3227                 }
3228
3229               runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3230               name = (char *) xmalloc (arg->val.str.lenmb + 1);
3231               memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3232               name[arg->val.str.lenmb] = '\0';
3233               runp->name = name;
3234
3235               runp->def_next = collate->known_sections;
3236               collate->known_sections = runp;
3237             }
3238           lr_ignore_rest (ldfile, 1);
3239           break;
3240
3241         case tok_order_start:
3242           /* Ignore the rest of the line if we don't need the input of
3243              this line.  */
3244           if (ignore_content)
3245             {
3246               lr_ignore_rest (ldfile, 0);
3247               break;
3248             }
3249
3250           if (state != 0 && state != 1 && state != 2)
3251             goto err_label;
3252           state = 1;
3253
3254           /* The 14652 draft does not specify whether all `order_start' lines
3255              must contain the same number of sort-rules, but 14651 does.  So
3256              we require this here as well.  */
3257           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3258           if (arg->tok == tok_bsymbol)
3259             {
3260               /* This better should be a section name.  */
3261               struct section_list *sp = collate->known_sections;
3262               while (sp != NULL
3263                      && (sp->name == NULL
3264                          || strncmp (sp->name, arg->val.str.startmb,
3265                                      arg->val.str.lenmb) != 0
3266                          || sp->name[arg->val.str.lenmb] != '\0'))
3267                 sp = sp->def_next;
3268
3269               if (sp == NULL)
3270                 {
3271                   lr_error (ldfile, _("\
3272 %s: unknown section name `%.*s'"),
3273                             "LC_COLLATE", (int) arg->val.str.lenmb,
3274                             arg->val.str.startmb);
3275                   /* We use the error section.  */
3276                   collate->current_section = &collate->error_section;
3277
3278                   if (collate->error_section.first == NULL)
3279                     {
3280                       /* Insert &collate->error_section at the end of
3281                          the collate->sections list.  */
3282                       if (collate->sections == NULL)
3283                         collate->sections = &collate->error_section;
3284                       else
3285                         {
3286                           sp = collate->sections;
3287                           while (sp->next != NULL)
3288                             sp = sp->next;
3289
3290                           sp->next = &collate->error_section;
3291                         }
3292                       collate->error_section.next = NULL;
3293                     }
3294                 }
3295               else
3296                 {
3297                   /* One should not be allowed to open the same
3298                      section twice.  */
3299                   if (sp->first != NULL)
3300                     lr_error (ldfile, _("\
3301 %s: multiple order definitions for section `%s'"),
3302                               "LC_COLLATE", sp->name);
3303                   else
3304                     {
3305                       /* Insert sp in the collate->sections list,
3306                          right after collate->current_section.  */
3307                       if (collate->current_section != NULL)
3308                         {
3309                           sp->next = collate->current_section->next;
3310                           collate->current_section->next = sp;
3311                         }
3312                       else if (collate->sections == NULL)
3313                         /* This is the first section to be defined.  */
3314                         collate->sections = sp;
3315
3316                       collate->current_section = sp;
3317                     }
3318
3319                   /* Next should come the end of the line or a semicolon.  */
3320                   arg = lr_token (ldfile, charmap, result, repertoire,
3321                                   verbose);
3322                   if (arg->tok == tok_eol)
3323                     {
3324                       uint32_t cnt;
3325
3326                       /* This means we have exactly one rule: `forward'.  */
3327                       if (nrules > 1)
3328                         lr_error (ldfile, _("\
3329 %s: invalid number of sorting rules"),
3330                                   "LC_COLLATE");
3331                       else
3332                         nrules = 1;
3333                       sp->rules = obstack_alloc (&collate->mempool,
3334                                                  (sizeof (enum coll_sort_rule)
3335                                                   * nrules));
3336                       for (cnt = 0; cnt < nrules; ++cnt)
3337                         sp->rules[cnt] = sort_forward;
3338
3339                       /* Next line.  */
3340                       break;
3341                     }
3342
3343                   /* Get the next token.  */
3344                   arg = lr_token (ldfile, charmap, result, repertoire,
3345                                   verbose);
3346                 }
3347             }
3348           else
3349             {
3350               /* There is no section symbol.  Therefore we use the unnamed
3351                  section.  */
3352               collate->current_section = &collate->unnamed_section;
3353
3354               if (collate->unnamed_section_defined)
3355                 lr_error (ldfile, _("\
3356 %s: multiple order definitions for unnamed section"),
3357                           "LC_COLLATE");
3358               else
3359                 {
3360                   /* Insert &collate->unnamed_section at the beginning of
3361                      the collate->sections list.  */
3362                   collate->unnamed_section.next = collate->sections;
3363                   collate->sections = &collate->unnamed_section;
3364                   collate->unnamed_section_defined = true;
3365                 }
3366             }
3367
3368           /* Now read the direction names.  */
3369           read_directions (ldfile, arg, charmap, repertoire, result);
3370
3371           /* From now we need the strings untranslated.  */
3372           ldfile->translate_strings = 0;
3373           break;
3374
3375         case tok_order_end:
3376           /* Ignore the rest of the line if we don't need the input of
3377              this line.  */
3378           if (ignore_content)
3379             {
3380               lr_ignore_rest (ldfile, 0);
3381               break;
3382             }
3383
3384           if (state != 1)
3385             goto err_label;
3386
3387           /* Handle ellipsis at end of list.  */
3388           if (was_ellipsis != tok_none)
3389             {
3390               handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3391                                repertoire, result);
3392               was_ellipsis = tok_none;
3393             }
3394
3395           state = 2;
3396           lr_ignore_rest (ldfile, 1);
3397           break;
3398
3399         case tok_reorder_after:
3400           /* Ignore the rest of the line if we don't need the input of
3401              this line.  */
3402           if (ignore_content)
3403             {
3404               lr_ignore_rest (ldfile, 0);
3405               break;
3406             }
3407
3408           if (state == 1)
3409             {
3410               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3411                         "LC_COLLATE");
3412               state = 2;
3413
3414               /* Handle ellipsis at end of list.  */
3415               if (was_ellipsis != tok_none)
3416                 {
3417                   handle_ellipsis (ldfile, arg->val.str.startmb,
3418                                    arg->val.str.lenmb, was_ellipsis, charmap,
3419                                    repertoire, result);
3420                   was_ellipsis = tok_none;
3421                 }
3422             }
3423           else if (state == 0 && copy_locale == NULL)
3424             goto err_label;
3425           else if (state != 0 && state != 2 && state != 3)
3426             goto err_label;
3427           state = 3;
3428
3429           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3430           if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3431             {
3432               /* Find this symbol in the sequence table.  */
3433               char ucsbuf[10];
3434               char *startmb;
3435               size_t lenmb;
3436               struct element_t *insp;
3437               int no_error = 1;
3438               void *ptr;
3439
3440               if (arg->tok == tok_bsymbol)
3441                 {
3442                   startmb = arg->val.str.startmb;
3443                   lenmb = arg->val.str.lenmb;
3444                 }
3445               else
3446                 {
3447                   sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3448                   startmb = ucsbuf;
3449                   lenmb = 9;
3450                 }
3451
3452               if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3453                 /* Yes, the symbol exists.  Simply point the cursor
3454                    to it.  */
3455                 collate->cursor = (struct element_t *) ptr;
3456               else
3457                 {
3458                   struct symbol_t *symbp;
3459                   void *ptr;
3460
3461                   if (find_entry (&collate->sym_table, startmb, lenmb,
3462                                   &ptr) == 0)
3463                     {
3464                       symbp = ptr;
3465
3466                       if (symbp->order->last != NULL
3467                           || symbp->order->next != NULL)
3468                         collate->cursor = symbp->order;
3469                       else
3470                         {
3471                           /* This is a collating symbol but its position
3472                              is not yet defined.  */
3473                           lr_error (ldfile, _("\
3474 %s: order for collating symbol %.*s not yet defined"),
3475                                     "LC_COLLATE", (int) lenmb, startmb);
3476                           collate->cursor = NULL;
3477                           no_error = 0;
3478                         }
3479                     }
3480                   else if (find_entry (&collate->elem_table, startmb, lenmb,
3481                                        &ptr) == 0)
3482                     {
3483                       insp = (struct element_t *) ptr;
3484
3485                       if (insp->last != NULL || insp->next != NULL)
3486                         collate->cursor = insp;
3487                       else
3488                         {
3489                           /* This is a collating element but its position
3490                              is not yet defined.  */
3491                           lr_error (ldfile, _("\
3492 %s: order for collating element %.*s not yet defined"),
3493                                     "LC_COLLATE", (int) lenmb, startmb);
3494                           collate->cursor = NULL;
3495                           no_error = 0;
3496                         }
3497                     }
3498                   else
3499                     {
3500                       /* This is bad.  The symbol after which we have to
3501                          insert does not exist.  */
3502                       lr_error (ldfile, _("\
3503 %s: cannot reorder after %.*s: symbol not known"),
3504                                 "LC_COLLATE", (int) lenmb, startmb);
3505                       collate->cursor = NULL;
3506                       no_error = 0;
3507                     }
3508                 }
3509
3510               lr_ignore_rest (ldfile, no_error);
3511             }
3512           else
3513             /* This must not happen.  */
3514             goto err_label;
3515           break;
3516
3517         case tok_reorder_end:
3518           /* Ignore the rest of the line if we don't need the input of
3519              this line.  */
3520           if (ignore_content)
3521             break;
3522
3523           if (state != 3)
3524             goto err_label;
3525           state = 4;
3526           lr_ignore_rest (ldfile, 1);
3527           break;
3528
3529         case tok_reorder_sections_after:
3530           /* Ignore the rest of the line if we don't need the input of
3531              this line.  */
3532           if (ignore_content)
3533             {
3534               lr_ignore_rest (ldfile, 0);
3535               break;
3536             }
3537
3538           if (state == 1)
3539             {
3540               lr_error (ldfile, _("%s: missing `order_end' keyword"),
3541                         "LC_COLLATE");
3542               state = 2;
3543
3544               /* Handle ellipsis at end of list.  */
3545               if (was_ellipsis != tok_none)
3546                 {
3547                   handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3548                                    repertoire, result);
3549                   was_ellipsis = tok_none;
3550                 }
3551             }
3552           else if (state == 3)
3553             {
3554               WITH_CUR_LOCALE (error (0, 0, _("\
3555 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3556               state = 4;
3557             }
3558           else if (state != 2 && state != 4)
3559             goto err_label;
3560           state = 5;
3561
3562           /* Get the name of the sections we are adding after.  */
3563           arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3564           if (arg->tok == tok_bsymbol)
3565             {
3566               /* Now find a section with this name.  */
3567               struct section_list *runp = collate->sections;
3568
3569               while (runp != NULL)
3570                 {
3571                   if (runp->name != NULL
3572                       && strlen (runp->name) == arg->val.str.lenmb
3573                       && memcmp (runp->name, arg->val.str.startmb,
3574                                  arg->val.str.lenmb) == 0)
3575                     break;
3576
3577                   runp = runp->next;
3578                 }
3579
3580               if (runp != NULL)
3581                 collate->current_section = runp;
3582               else
3583                 {
3584                   /* This is bad.  The section after which we have to
3585                      reorder does not exist.  Therefore we cannot
3586                      process the whole rest of this reorder
3587                      specification.  */
3588                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3589                             "LC_COLLATE", (int) arg->val.str.lenmb,
3590                             arg->val.str.startmb);
3591
3592                   do
3593                     {
3594                       lr_ignore_rest (ldfile, 0);
3595
3596                       now = lr_token (ldfile, charmap, result, NULL, verbose);
3597                     }
3598                   while (now->tok == tok_reorder_sections_after
3599                          || now->tok == tok_reorder_sections_end
3600                          || now->tok == tok_end);
3601
3602                   /* Process the token we just saw.  */
3603                   nowtok = now->tok;
3604                   continue;
3605                 }
3606             }
3607           else
3608             /* This must not happen.  */
3609             goto err_label;
3610           break;
3611
3612         case tok_reorder_sections_end:
3613           /* Ignore the rest of the line if we don't need the input of
3614              this line.  */
3615           if (ignore_content)
3616             break;
3617
3618           if (state != 5)
3619             goto err_label;
3620           state = 6;
3621           lr_ignore_rest (ldfile, 1);
3622           break;
3623
3624         case tok_bsymbol:
3625         case tok_ucs4:
3626           /* Ignore the rest of the line if we don't need the input of
3627              this line.  */
3628           if (ignore_content)
3629             {
3630               lr_ignore_rest (ldfile, 0);
3631               break;
3632             }
3633
3634           if (state != 0 && state != 1 && state != 3 && state != 5)
3635             goto err_label;
3636
3637           if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3638             goto err_label;
3639
3640           if (nowtok == tok_ucs4)
3641             {
3642               snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3643               symstr = ucs4buf;
3644               symlen = 9;
3645             }
3646           else if (arg != NULL)
3647             {
3648               symstr = arg->val.str.startmb;
3649               symlen = arg->val.str.lenmb;
3650             }
3651           else
3652             {
3653               lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3654                         (int) ldfile->token.val.str.lenmb,
3655                         ldfile->token.val.str.startmb);
3656               break;
3657             }
3658
3659           struct element_t *seqp;
3660           if (state == 0)
3661             {
3662               /* We are outside an `order_start' region.  This means
3663                  we must only accept definitions of values for
3664                  collation symbols since these are purely abstract
3665                  values and don't need directions associated.  */
3666               void *ptr;
3667
3668               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3669                 {
3670                   seqp = ptr;
3671
3672                   /* It's already defined.  First check whether this
3673                      is really a collating symbol.  */
3674                   if (seqp->is_character)
3675                     goto err_label;
3676
3677                   goto move_entry;
3678                 }
3679               else
3680                 {
3681                   void *result;
3682
3683                   if (find_entry (&collate->sym_table, symstr, symlen,
3684                                   &result) != 0)
3685                     /* No collating symbol, it's an error.  */
3686                     goto err_label;
3687
3688                   /* Maybe this is the first time we define a symbol
3689                      value and it is before the first actual section.  */
3690                   if (collate->sections == NULL)
3691                     collate->sections = collate->current_section =
3692                       &collate->symbol_section;
3693                 }
3694
3695               if (was_ellipsis != tok_none)
3696                 {
3697                   handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3698                                    charmap, repertoire, result);
3699
3700                   /* Remember that we processed the ellipsis.  */
3701                   was_ellipsis = tok_none;
3702
3703                   /* And don't add the value a second time.  */
3704                   break;
3705                 }
3706             }
3707           else if (state == 3)
3708             {
3709               /* It is possible that we already have this collation sequence.
3710                  In this case we move the entry.  */
3711               void *sym;
3712               void *ptr;
3713
3714               /* If the symbol after which we have to insert was not found
3715                  ignore all entries.  */
3716               if (collate->cursor == NULL)
3717                 {
3718                   lr_ignore_rest (ldfile, 0);
3719                   break;
3720                 }
3721
3722               if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3723                 {
3724                   seqp = (struct element_t *) ptr;
3725                   goto move_entry;
3726                 }
3727
3728               if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3729                   && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3730                 goto move_entry;
3731
3732               if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3733                   && (seqp = (struct element_t *) ptr,
3734                       seqp->last != NULL || seqp->next != NULL
3735                       || (collate->start != NULL && seqp == collate->start)))
3736                 {
3737                 move_entry:
3738                   /* Remove the entry from the old position.  */
3739                   if (seqp->last == NULL)
3740                     collate->start = seqp->next;
3741                   else
3742                     seqp->last->next = seqp->next;
3743                   if (seqp->next != NULL)
3744                     seqp->next->last = seqp->last;
3745
3746                   /* We also have to check whether this entry is the
3747                      first or last of a section.  */
3748                   if (seqp->section->first == seqp)
3749                     {
3750                       if (seqp->section->first == seqp->section->last)
3751                         /* This section has no content anymore.  */
3752                         seqp->section->first = seqp->section->last = NULL;
3753                       else
3754                         seqp->section->first = seqp->next;
3755                     }
3756                   else if (seqp->section->last == seqp)
3757                     seqp->section->last = seqp->last;
3758
3759                   /* Now insert it in the new place.  */
3760                   insert_weights (ldfile, seqp, charmap, repertoire, result,
3761                                   tok_none);
3762                   break;
3763                 }
3764
3765               /* Otherwise we just add a new entry.  */
3766             }
3767           else if (state == 5)
3768             {
3769               /* We are reordering sections.  Find the named section.  */
3770               struct section_list *runp = collate->sections;
3771               struct section_list *prevp = NULL;
3772
3773               while (runp != NULL)
3774                 {
3775                   if (runp->name != NULL
3776                       && strlen (runp->name) == symlen
3777                       && memcmp (runp->name, symstr, symlen) == 0)
3778                     break;
3779
3780                   prevp = runp;
3781                   runp = runp->next;
3782                 }
3783
3784               if (runp == NULL)
3785                 {
3786                   lr_error (ldfile, _("%s: section `%.*s' not known"),
3787                             "LC_COLLATE", (int) symlen, symstr);
3788                   lr_ignore_rest (ldfile, 0);
3789                 }
3790               else
3791                 {
3792                   if (runp != collate->current_section)
3793                     {
3794                       /* Remove the named section from the old place and
3795                          insert it in the new one.  */
3796                       prevp->next = runp->next;
3797
3798                       runp->next = collate->current_section->next;
3799                       collate->current_section->next = runp;
3800                       collate->current_section = runp;
3801                     }
3802
3803                   /* Process the rest of the line which might change
3804                      the collation rules.  */
3805                   arg = lr_token (ldfile, charmap, result, repertoire,
3806                                   verbose);
3807                   if (arg->tok != tok_eof && arg->tok != tok_eol)
3808                     read_directions (ldfile, arg, charmap, repertoire,
3809                                      result);
3810                 }
3811               break;
3812             }
3813           else if (was_ellipsis != tok_none)
3814             {
3815               /* Using the information in the `ellipsis_weight'
3816                  element and this and the last value we have to handle
3817                  the ellipsis now.  */
3818               assert (state == 1);
3819
3820               handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3821                                repertoire, result);
3822
3823               /* Remember that we processed the ellipsis.  */
3824               was_ellipsis = tok_none;
3825
3826               /* And don't add the value a second time.  */
3827               break;
3828             }
3829
3830           /* Now insert in the new place.  */
3831           insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3832           break;
3833
3834         case tok_undefined:
3835           /* Ignore the rest of the line if we don't need the input of
3836              this line.  */
3837           if (ignore_content)
3838             {
3839               lr_ignore_rest (ldfile, 0);
3840               break;
3841             }
3842
3843           if (state != 1)
3844             goto err_label;
3845
3846           if (was_ellipsis != tok_none)
3847             {
3848               lr_error (ldfile,
3849                         _("%s: cannot have `%s' as end of ellipsis range"),
3850                         "LC_COLLATE", "UNDEFINED");
3851
3852               unlink_element (collate);
3853               was_ellipsis = tok_none;
3854             }
3855
3856           /* See whether UNDEFINED already appeared somewhere.  */
3857           if (collate->undefined.next != NULL
3858               || &collate->undefined == collate->cursor)
3859             {
3860               lr_error (ldfile,
3861                         _("%s: order for `%.*s' already defined at %s:%Zu"),
3862                         "LC_COLLATE", 9, "UNDEFINED",
3863                         collate->undefined.file,
3864                         collate->undefined.line);
3865               lr_ignore_rest (ldfile, 0);
3866             }
3867           else
3868             /* Parse the weights.  */
3869              insert_weights (ldfile, &collate->undefined, charmap,
3870                              repertoire, result, tok_none);
3871           break;
3872
3873         case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3874         case tok_ellipsis3: /* absolute ellipsis */
3875         case tok_ellipsis4: /* symbolic decimal ellipsis */
3876           /* This is the symbolic (decimal or hexadecimal) or absolute
3877              ellipsis.  */
3878           if (was_ellipsis != tok_none)
3879             goto err_label;
3880
3881           if (state != 0 && state != 1 && state != 3)
3882             goto err_label;
3883
3884           was_ellipsis = nowtok;
3885
3886           insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3887                           repertoire, result, nowtok);
3888           break;
3889
3890         case tok_end:
3891         seen_end:
3892           /* Next we assume `LC_COLLATE'.  */
3893           if (!ignore_content)
3894             {
3895               if (state == 0 && copy_locale == NULL)
3896                 /* We must either see a copy statement or have
3897                    ordering values.  */
3898                 lr_error (ldfile,
3899                           _("%s: empty category description not allowed"),
3900                           "LC_COLLATE");
3901               else if (state == 1)
3902                 {
3903                   lr_error (ldfile, _("%s: missing `order_end' keyword"),
3904                             "LC_COLLATE");
3905
3906                   /* Handle ellipsis at end of list.  */
3907                   if (was_ellipsis != tok_none)
3908                     {
3909                       handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3910                                        repertoire, result);
3911                       was_ellipsis = tok_none;
3912                     }
3913                 }
3914               else if (state == 3)
3915                 WITH_CUR_LOCALE (error (0, 0, _("\
3916 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3917               else if (state == 5)
3918                 WITH_CUR_LOCALE (error (0, 0, _("\
3919 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3920             }
3921           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3922           if (arg->tok == tok_eof)
3923             break;
3924           if (arg->tok == tok_eol)
3925             lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3926           else if (arg->tok != tok_lc_collate)
3927             lr_error (ldfile, _("\
3928 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3929           lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3930           return;
3931
3932         case tok_define:
3933           if (ignore_content)
3934             {
3935               lr_ignore_rest (ldfile, 0);
3936               break;
3937             }
3938
3939           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3940           if (arg->tok != tok_ident)
3941             goto err_label;
3942
3943           /* Simply add the new symbol.  */
3944           struct name_list *newsym = xmalloc (sizeof (*newsym)
3945                                               + arg->val.str.lenmb + 1);
3946           memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3947           newsym->str[arg->val.str.lenmb] = '\0';
3948           newsym->next = defined;
3949           defined = newsym;
3950
3951           lr_ignore_rest (ldfile, 1);
3952           break;
3953
3954         case tok_undef:
3955           if (ignore_content)
3956             {
3957               lr_ignore_rest (ldfile, 0);
3958               break;
3959             }
3960
3961           arg = lr_token (ldfile, charmap, result, NULL, verbose);
3962           if (arg->tok != tok_ident)
3963             goto err_label;
3964
3965           /* Remove _all_ occurrences of the symbol from the list.  */
3966           struct name_list *prevdef = NULL;
3967           struct name_list *curdef = defined;
3968           while (curdef != NULL)
3969             if (strncmp (arg->val.str.startmb, curdef->str,
3970                          arg->val.str.lenmb) == 0
3971                 && curdef->str[arg->val.str.lenmb] == '\0')
3972               {
3973                 if (prevdef == NULL)
3974                   defined = curdef->next;
3975                 else
3976                   prevdef->next = curdef->next;
3977
3978                 struct name_list *olddef = curdef;
3979                 curdef = curdef->next;
3980
3981                 free (olddef);
3982               }
3983             else
3984               {
3985                 prevdef = curdef;
3986                 curdef = curdef->next;
3987               }
3988
3989           lr_ignore_rest (ldfile, 1);
3990           break;
3991
3992         case tok_ifdef:
3993         case tok_ifndef:
3994           if (ignore_content)
3995             {
3996               lr_ignore_rest (ldfile, 0);
3997               break;
3998             }
3999
4000         found_ifdef:
4001           arg = lr_token (ldfile, charmap, result, NULL, verbose);
4002           if (arg->tok != tok_ident)
4003             goto err_label;
4004           lr_ignore_rest (ldfile, 1);
4005
4006           if (collate->else_action == else_none)
4007             {
4008               curdef = defined;
4009               while (curdef != NULL)
4010                 if (strncmp (arg->val.str.startmb, curdef->str,
4011                              arg->val.str.lenmb) == 0
4012                     && curdef->str[arg->val.str.lenmb] == '\0')
4013                   break;
4014                 else
4015                   curdef = curdef->next;
4016
4017               if ((nowtok == tok_ifdef && curdef != NULL)
4018                   || (nowtok == tok_ifndef && curdef == NULL))
4019                 {
4020                   /* We have to use the if-branch.  */
4021                   collate->else_action = else_ignore;
4022                 }
4023               else
4024                 {
4025                   /* We have to use the else-branch, if there is one.  */
4026                   nowtok = skip_to (ldfile, collate, charmap, 0);
4027                   if (nowtok == tok_else)
4028                     collate->else_action = else_seen;
4029                   else if (nowtok == tok_elifdef)
4030                     {
4031                       nowtok = tok_ifdef;
4032                       goto found_ifdef;
4033                     }
4034                   else if (nowtok == tok_elifndef)
4035                     {
4036                       nowtok = tok_ifndef;
4037                       goto found_ifdef;
4038                     }
4039                   else if (nowtok == tok_eof)
4040                     goto seen_eof;
4041                   else if (nowtok == tok_end)
4042                     goto seen_end;
4043                 }
4044             }
4045           else
4046             {
4047               /* XXX Should it really become necessary to support nested
4048                  preprocessor handling we will push the state here.  */
4049               lr_error (ldfile, _("%s: nested conditionals not supported"),
4050                         "LC_COLLATE");
4051               nowtok = skip_to (ldfile, collate, charmap, 1);
4052               if (nowtok == tok_eof)
4053                 goto seen_eof;
4054               else if (nowtok == tok_end)
4055                 goto seen_end;
4056             }
4057           break;
4058
4059         case tok_elifdef:
4060         case tok_elifndef:
4061         case tok_else:
4062           if (ignore_content)
4063             {
4064               lr_ignore_rest (ldfile, 0);
4065               break;
4066             }
4067
4068           lr_ignore_rest (ldfile, 1);
4069
4070           if (collate->else_action == else_ignore)
4071             {
4072               /* Ignore everything until the endif.  */
4073               nowtok = skip_to (ldfile, collate, charmap, 1);
4074               if (nowtok == tok_eof)
4075                 goto seen_eof;
4076               else if (nowtok == tok_end)
4077                 goto seen_end;
4078             }
4079           else
4080             {
4081               assert (collate->else_action == else_none);
4082               lr_error (ldfile, _("\
4083 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
4084                         nowtok == tok_else ? "else"
4085                         : nowtok == tok_elifdef ? "elifdef" : "elifndef");
4086             }
4087           break;
4088
4089         case tok_endif:
4090           if (ignore_content)
4091             {
4092               lr_ignore_rest (ldfile, 0);
4093               break;
4094             }
4095
4096           lr_ignore_rest (ldfile, 1);
4097
4098           if (collate->else_action != else_ignore
4099               && collate->else_action != else_seen)
4100             lr_error (ldfile, _("\
4101 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
4102
4103           /* XXX If we support nested preprocessor directives we pop
4104              the state here.  */
4105           collate->else_action = else_none;
4106           break;
4107
4108         default:
4109         err_label:
4110           SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
4111         }
4112
4113       /* Prepare for the next round.  */
4114       now = lr_token (ldfile, charmap, result, NULL, verbose);
4115       nowtok = now->tok;
4116     }
4117
4118  seen_eof:
4119   /* When we come here we reached the end of the file.  */
4120   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
4121 }