locale/programs/linereader.c

   1 /* Copyright (C) 1996-2005, 2006 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License version 2 as
   7    published by the Free Software Foundation.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <errno.h>
  25 #include <libintl.h>
  26 #include <stdarg.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29
  30 #include "localedef.h"
  31 #include "charmap.h"
  32 #include "error.h"
  33 #include "linereader.h"
  34 #include "locfile.h"
  35
  36 /* Prototypes for local functions.  */
  37 static struct token *get_toplvl_escape (struct linereader *lr);
  38 static struct token *get_symname (struct linereader *lr);
  39 static struct token *get_ident (struct linereader *lr);
  40 static struct token *get_string (struct linereader *lr,
  41                                  const struct charmap_t *charmap,
  42                                  struct localedef_t *locale,
  43                                  const struct repertoire_t *repertoire,
  44                                  int verbose);
  45
  46
  47 struct linereader *
  48 lr_open (const char *fname, kw_hash_fct_t hf)
  49 {
  50   FILE *fp;
  51
  52   if (fname == NULL || strcmp (fname, "-") == 0
  53       || strcmp (fname, "/dev/stdin") == 0)
  54     return lr_create (stdin, "<stdin>", hf);
  55   else
  56     {
  57       fp = fopen (fname, "rm");
  58       if (fp == NULL)
  59         return NULL;
  60       return lr_create (fp, fname, hf);
  61     }
  62 }
  63
  64 struct linereader *
  65 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
  66 {
  67   struct linereader *result;
  68   int n;
  69
  70   result = (struct linereader *) xmalloc (sizeof (*result));
  71
  72   result->fp = fp;
  73   result->fname = xstrdup (fname);
  74   result->buf = NULL;
  75   result->bufsize = 0;
  76   result->lineno = 1;
  77   result->idx = 0;
  78   result->comment_char = '#';
  79   result->escape_char = '\\';
  80   result->translate_strings = 1;
  81   result->return_widestr = 0;
  82
  83   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  84   if (n < 0)
  85     {
  86       int save = errno;
  87       fclose (result->fp);
  88       free ((char *) result->fname);
  89       free (result);
  90       errno = save;
  91       return NULL;
  92     }
  93
  94   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
  95     n -= 2;
  96
  97   result->buf[n] = '\0';
  98   result->bufact = n;
  99   result->hash_fct = hf;
 100
 101   return result;
 102 }
 103
 104
 105 int
 106 lr_eof (struct linereader *lr)
 107 {
 108   return lr->bufact = 0;
 109 }
 110
 111
 112 void
 113 lr_ignore_rest (struct linereader *lr, int verbose)
 114 {
 115   if (verbose)
 116     {
 117       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
 118              && lr->buf[lr->idx] != lr->comment_char)
 119         if (lr->buf[lr->idx] == '\0')
 120           {
 121             if (lr_next (lr) < 0)
 122               return;
 123           }
 124         else
 125           ++lr->idx;
 126
 127       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
 128           && lr->buf[lr->idx] != lr->comment_char)
 129         lr_error (lr, _("trailing garbage at end of line"));
 130     }
 131
 132   /* Ignore continued line.  */
 133   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
 134     if (lr_next (lr) < 0)
 135       break;
 136
 137   lr->idx = lr->bufact;
 138 }
 139
 140
 141 void
 142 lr_close (struct linereader *lr)
 143 {
 144   fclose (lr->fp);
 145   free (lr->buf);
 146   free (lr);
 147 }
 148
 149
 150 int
 151 lr_next (struct linereader *lr)
 152 {
 153   int n;
 154
 155   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
 156   if (n < 0)
 157     return -1;
 158
 159   ++lr->lineno;
 160
 161   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
 162     {
 163 #if 0
 164       /* XXX Is this correct?  */
 165       /* An escaped newline character is substituted with a single <SP>.  */
 166       --n;
 167       lr->buf[n - 1] = ' ';
 168 #else
 169       n -= 2;
 170 #endif
 171     }
 172
 173   lr->buf[n] = '\0';
 174   lr->bufact = n;
 175   lr->idx = 0;
 176
 177   return 0;
 178 }
 179
 180
 181 /* Defined in error.c.  */
 182 /* This variable is incremented each time `error' is called.  */
 183 extern unsigned int error_message_count;
 184
 185 /* The calling program should define program_name and set it to the
 186    name of the executing program.  */
 187 extern char *program_name;
 188
 189
 190 struct token *
 191 lr_token (struct linereader *lr, const struct charmap_t *charmap,
 192           struct localedef_t *locale, const struct repertoire_t *repertoire,
 193           int verbose)
 194 {
 195   int ch;
 196
 197   while (1)
 198     {
 199       do
 200         {
 201           ch = lr_getc (lr);
 202
 203           if (ch == EOF)
 204             {
 205               lr->token.tok = tok_eof;
 206               return &lr->token;
 207             };
 208
 209           if (ch == '\n')
 210             {
 211               lr->token.tok = tok_eol;
 212               return &lr->token;
 213             }
 214         }
 215       while (isspace (ch));
 216
 217       if (ch != lr->comment_char)
 218         break;
 219
 220       /* Is there an newline at the end of the buffer?  */
 221       if (lr->buf[lr->bufact - 1] != '\n')
 222         {
 223           /* No.  Some people want this to mean that only the line in
 224              the file not the logical, concatenated line is ignored.
 225              Let's try this.  */
 226           lr->idx = lr->bufact;
 227           continue;
 228         }
 229
 230       /* Ignore rest of line.  */
 231       lr_ignore_rest (lr, 0);
 232       lr->token.tok = tok_eol;
 233       return &lr->token;
 234     }
 235
 236   /* Match escape sequences.  */
 237   if (ch == lr->escape_char)
 238     return get_toplvl_escape (lr);
 239
 240   /* Match ellipsis.  */
 241   if (ch == '.')
 242     {
 243       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 244         {
 245           int cnt;
 246           for (cnt = 0; cnt < 10; ++cnt)
 247             lr_getc (lr);
 248           lr->token.tok = tok_ellipsis4_2;
 249           return &lr->token;
 250         }
 251       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 252         {
 253           lr_getc (lr);
 254           lr_getc (lr);
 255           lr_getc (lr);
 256           lr->token.tok = tok_ellipsis4;
 257           return &lr->token;
 258         }
 259       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
 260         {
 261           lr_getc (lr);
 262           lr_getc (lr);
 263           lr->token.tok = tok_ellipsis3;
 264           return &lr->token;
 265         }
 266       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 267         {
 268           int cnt;
 269           for (cnt = 0; cnt < 6; ++cnt)
 270             lr_getc (lr);
 271           lr->token.tok = tok_ellipsis2_2;
 272           return &lr->token;
 273         }
 274       if (lr->buf[lr->idx] == '.')
 275         {
 276           lr_getc (lr);
 277           lr->token.tok = tok_ellipsis2;
 278           return &lr->token;
 279         }
 280     }
 281
 282   switch (ch)
 283     {
 284     case '<':
 285       return get_symname (lr);
 286
 287     case '0' ... '9':
 288       lr->token.tok = tok_number;
 289       lr->token.val.num = ch - '0';
 290
 291       while (isdigit (ch = lr_getc (lr)))
 292         {
 293           lr->token.val.num *= 10;
 294           lr->token.val.num += ch - '0';
 295         }
 296       if (isalpha (ch))
 297         lr_error (lr, _("garbage at end of number"));
 298       lr_ungetn (lr, 1);
 299
 300       return &lr->token;
 301
 302     case ';':
 303       lr->token.tok = tok_semicolon;
 304       return &lr->token;
 305
 306     case ',':
 307       lr->token.tok = tok_comma;
 308       return &lr->token;
 309
 310     case '(':
 311       lr->token.tok = tok_open_brace;
 312       return &lr->token;
 313
 314     case ')':
 315       lr->token.tok = tok_close_brace;
 316       return &lr->token;
 317
 318     case '"':
 319       return get_string (lr, charmap, locale, repertoire, verbose);
 320
 321     case '-':
 322       ch = lr_getc (lr);
 323       if (ch == '1')
 324         {
 325           lr->token.tok = tok_minus1;
 326           return &lr->token;
 327         }
 328       lr_ungetn (lr, 2);
 329       break;
 330     }
 331
 332   return get_ident (lr);
 333 }
 334
 335
 336 static struct token *
 337 get_toplvl_escape (struct linereader *lr)
 338 {
 339   /* This is supposed to be a numeric value.  We return the
 340      numerical value and the number of bytes.  */
 341   size_t start_idx = lr->idx - 1;
 342   unsigned char *bytes = lr->token.val.charcode.bytes;
 343   size_t nbytes = 0;
 344   int ch;
 345
 346   do
 347     {
 348       unsigned int byte = 0;
 349       unsigned int base = 8;
 350
 351       ch = lr_getc (lr);
 352
 353       if (ch == 'd')
 354         {
 355           base = 10;
 356           ch = lr_getc (lr);
 357         }
 358       else if (ch == 'x')
 359         {
 360           base = 16;
 361           ch = lr_getc (lr);
 362         }
 363
 364       if ((base == 16 && !isxdigit (ch))
 365           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 366         {
 367         esc_error:
 368           lr->token.val.str.startmb = &lr->buf[start_idx];
 369
 370           while (ch != EOF && !isspace (ch))
 371             ch = lr_getc (lr);
 372           lr->token.val.str.lenmb = lr->idx - start_idx;
 373
 374           lr->token.tok = tok_error;
 375           return &lr->token;
 376         }
 377
 378       if (isdigit (ch))
 379         byte = ch - '0';
 380       else
 381         byte = tolower (ch) - 'a' + 10;
 382
 383       ch = lr_getc (lr);
 384       if ((base == 16 && !isxdigit (ch))
 385           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 386         goto esc_error;
 387
 388       byte *= base;
 389       if (isdigit (ch))
 390         byte += ch - '0';
 391       else
 392         byte += tolower (ch) - 'a' + 10;
 393
 394       ch = lr_getc (lr);
 395       if (base != 16 && isdigit (ch))
 396         {
 397           byte *= base;
 398           byte += ch - '0';
 399
 400           ch = lr_getc (lr);
 401         }
 402
 403       bytes[nbytes++] = byte;
 404     }
 405   while (ch == lr->escape_char
 406          && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
 407
 408   if (!isspace (ch))
 409     lr_error (lr, _("garbage at end of character code specification"));
 410
 411   lr_ungetn (lr, 1);
 412
 413   lr->token.tok = tok_charcode;
 414   lr->token.val.charcode.nbytes = nbytes;
 415
 416   return &lr->token;
 417 }
 418
 419
 420 #define ADDC(ch) \
 421   do                                                                          \
 422     {                                                                         \
 423       if (bufact == bufmax)                                                   \
 424         {                                                                     \
 425           bufmax *= 2;                                                        \
 426           buf = xrealloc (buf, bufmax);                                       \
 427         }                                                                     \
 428       buf[bufact++] = (ch);                                                   \
 429     }                                                                         \
 430   while (0)
 431
 432
 433 #define ADDS(s, l) \
 434   do                                                                          \
 435     {                                                                         \
 436       size_t _l = (l);                                                        \
 437       if (bufact + _l > bufmax)                                               \
 438         {                                                                     \
 439           if (bufact < _l)                                                    \
 440             bufact = _l;                                                      \
 441           bufmax *= 2;                                                        \
 442           buf = xrealloc (buf, bufmax);                                       \
 443         }                                                                     \
 444       memcpy (&buf[bufact], s, _l);                                           \
 445       bufact += _l;                                                           \
 446     }                                                                         \
 447   while (0)
 448
 449
 450 #define ADDWC(ch) \
 451   do                                                                          \
 452     {                                                                         \
 453       if (buf2act == buf2max)                                                 \
 454         {                                                                     \
 455           buf2max *= 2;                                                       \
 456           buf2 = xrealloc (buf2, buf2max * 4);                                \
 457         }                                                                     \
 458       buf2[buf2act++] = (ch);                                                 \
 459     }                                                                         \
 460   while (0)
 461
 462
 463 static struct token *
 464 get_symname (struct linereader *lr)
 465 {
 466   /* Symbol in brackets.  We must distinguish three kinds:
 467      1. reserved words
 468      2. ISO 10646 position values
 469      3. all other.  */
 470   char *buf;
 471   size_t bufact = 0;
 472   size_t bufmax = 56;
 473   const struct keyword_t *kw;
 474   int ch;
 475
 476   buf = (char *) xmalloc (bufmax);
 477
 478   do
 479     {
 480       ch = lr_getc (lr);
 481       if (ch == lr->escape_char)
 482         {
 483           int c2 = lr_getc (lr);
 484           ADDC (c2);
 485
 486           if (c2 == '\n')
 487             ch = '\n';
 488         }
 489       else
 490         ADDC (ch);
 491     }
 492   while (ch != '>' && ch != '\n');
 493
 494   if (ch == '\n')
 495     lr_error (lr, _("unterminated symbolic name"));
 496
 497   /* Test for ISO 10646 position value.  */
 498   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
 499     {
 500       char *cp = buf + 1;
 501       while (cp < &buf[bufact - 1] && isxdigit (*cp))
 502         ++cp;
 503
 504       if (cp == &buf[bufact - 1])
 505         {
 506           /* Yes, it is.  */
 507           lr->token.tok = tok_ucs4;
 508           lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
 509
 510           return &lr->token;
 511         }
 512     }
 513
 514   /* It is a symbolic name.  Test for reserved words.  */
 515   kw = lr->hash_fct (buf, bufact - 1);
 516
 517   if (kw != NULL && kw->symname_or_ident == 1)
 518     {
 519       lr->token.tok = kw->token;
 520       free (buf);
 521     }
 522   else
 523     {
 524       lr->token.tok = tok_bsymbol;
 525
 526       buf[bufact] = '\0';
 527       buf = xrealloc (buf, bufact + 1);
 528
 529       lr->token.val.str.startmb = buf;
 530       lr->token.val.str.lenmb = bufact - 1;
 531     }
 532
 533   return &lr->token;
 534 }
 535
 536
 537 static struct token *
 538 get_ident (struct linereader *lr)
 539 {
 540   char *buf;
 541   size_t bufact;
 542   size_t bufmax = 56;
 543   const struct keyword_t *kw;
 544   int ch;
 545
 546   buf = xmalloc (bufmax);
 547   bufact = 0;
 548
 549   ADDC (lr->buf[lr->idx - 1]);
 550
 551   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
 552          && ch != '<' && ch != ',' && ch != EOF)
 553     {
 554       if (ch == lr->escape_char)
 555         {
 556           ch = lr_getc (lr);
 557           if (ch == '\n' || ch == EOF)
 558             {
 559               lr_error (lr, _("invalid escape sequence"));
 560               break;
 561             }
 562         }
 563       ADDC (ch);
 564     }
 565
 566   lr_ungetc (lr, ch);
 567
 568   kw = lr->hash_fct (buf, bufact);
 569
 570   if (kw != NULL && kw->symname_or_ident == 0)
 571     {
 572       lr->token.tok = kw->token;
 573       free (buf);
 574     }
 575   else
 576     {
 577       lr->token.tok = tok_ident;
 578
 579       buf[bufact] = '\0';
 580       buf = xrealloc (buf, bufact + 1);
 581
 582       lr->token.val.str.startmb = buf;
 583       lr->token.val.str.lenmb = bufact;
 584     }
 585
 586   return &lr->token;
 587 }
 588
 589
 590 static struct token *
 591 get_string (struct linereader *lr, const struct charmap_t *charmap,
 592             struct localedef_t *locale, const struct repertoire_t *repertoire,
 593             int verbose)
 594 {
 595   int return_widestr = lr->return_widestr;
 596   char *buf;
 597   wchar_t *buf2 = NULL;
 598   size_t bufact;
 599   size_t bufmax = 56;
 600
 601   /* We must return two different strings.  */
 602   buf = xmalloc (bufmax);
 603   bufact = 0;
 604
 605   /* We know it'll be a string.  */
 606   lr->token.tok = tok_string;
 607
 608   /* If we need not translate the strings (i.e., expand <...> parts)
 609      we can run a simple loop.  */
 610   if (!lr->translate_strings)
 611     {
 612       int ch;
 613
 614       buf2 = NULL;
 615       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 616         ADDC (ch);
 617
 618       /* Catch errors with trailing escape character.  */
 619       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
 620           && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
 621         {
 622           lr_error (lr, _("illegal escape sequence at end of string"));
 623           --bufact;
 624         }
 625       else if (ch == '\n' || ch == EOF)
 626         lr_error (lr, _("unterminated string"));
 627
 628       ADDC ('\0');
 629     }
 630   else
 631     {
 632       int illegal_string = 0;
 633       size_t buf2act = 0;
 634       size_t buf2max = 56 * sizeof (uint32_t);
 635       int ch;
 636       int warned = 0;
 637
 638       /* We have to provide the wide character result as well.  */
 639       if (return_widestr)
 640         buf2 = xmalloc (buf2max);
 641
 642       /* Read until the end of the string (or end of the line or file).  */
 643       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 644         {
 645           size_t startidx;
 646           uint32_t wch;
 647           struct charseq *seq;
 648
 649           if (ch != '<')
 650             {
 651               /* The standards leave it up to the implementation to decide
 652                  what to do with character which stand for themself.  We
 653                  could jump through hoops to find out the value relative to
 654                  the charmap and the repertoire map, but instead we leave
 655                  it up to the locale definition author to write a better
 656                  definition.  We assume here that every character which
 657                  stands for itself is encoded using ISO 8859-1.  Using the
 658                  escape character is allowed.  */
 659               if (ch == lr->escape_char)
 660                 {
 661                   ch = lr_getc (lr);
 662                   if (ch == '\n' || ch == EOF)
 663                     break;
 664                 }
 665
 666               if (verbose && !warned)
 667                 {
 668                   lr_error (lr, _("\
 669 non-symbolic character value should not be used"));
 670                   warned = 1;
 671                 }
 672
 673               ADDC (ch);
 674               if (return_widestr)
 675                 ADDWC ((uint32_t) ch);
 676
 677               continue;
 678             }
 679
 680           /* Now we have to search for the end of the symbolic name, i.e.,
 681              the closing '>'.  */
 682           startidx = bufact;
 683           while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
 684             {
 685               if (ch == lr->escape_char)
 686                 {
 687                   ch = lr_getc (lr);
 688                   if (ch == '\n' || ch == EOF)
 689                     break;
 690                 }
 691               ADDC (ch);
 692             }
 693           if (ch == '\n' || ch == EOF)
 694             /* Not a correct string.  */
 695             break;
 696           if (bufact == startidx)
 697             {
 698               /* <> is no correct name.  Ignore it and also signal an
 699                  error.  */
 700               illegal_string = 1;
 701               continue;
 702             }
 703
 704           /* It might be a Uxxxx symbol.  */
 705           if (buf[startidx] == 'U'
 706               && (bufact - startidx == 5 || bufact - startidx == 9))
 707             {
 708               char *cp = buf + startidx + 1;
 709               while (cp < &buf[bufact] && isxdigit (*cp))
 710                 ++cp;
 711
 712               if (cp == &buf[bufact])
 713                 {
 714                   char utmp[10];
 715
 716                   /* Yes, it is.  */
 717                   ADDC ('\0');
 718                   wch = strtoul (buf + startidx + 1, NULL, 16);
 719
 720                   /* Now forget about the name we just added.  */
 721                   bufact = startidx;
 722
 723                   if (return_widestr)
 724                     ADDWC (wch);
 725
 726                   /* See whether the charmap contains the Uxxxxxxxx names.  */
 727                   snprintf (utmp, sizeof (utmp), "U%08X", wch);
 728                   seq = charmap_find_value (charmap, utmp, 9);
 729
 730                   if (seq == NULL)
 731                     {
 732                      /* No, this isn't the case.  Now determine from
 733                         the repertoire the name of the character and
 734                         find it in the charmap.  */
 735                       if (repertoire != NULL)
 736                         {
 737                           const char *symbol;
 738
 739                           symbol = repertoire_find_symbol (repertoire, wch);
 740
 741                           if (symbol != NULL)
 742                             seq = charmap_find_value (charmap, symbol,
 743                                                       strlen (symbol));
 744                         }
 745
 746                       if (seq == NULL)
 747                         {
 748 #ifndef NO_TRANSLITERATION
 749                           /* Transliterate if possible.  */
 750                           if (locale != NULL)
 751                             {
 752                               uint32_t *translit;
 753
 754                               if ((locale->avail & CTYPE_LOCALE) == 0)
 755                                 {
 756                                   /* Load the CTYPE data now.  */
 757                                   int old_needed = locale->needed;
 758
 759                                   locale->needed = 0;
 760                                   locale = load_locale (LC_CTYPE,
 761                                                         locale->name,
 762                                                         locale->repertoire_name,
 763                                                         charmap, locale);
 764                                   locale->needed = old_needed;
 765                                 }
 766
 767                               if ((locale->avail & CTYPE_LOCALE) != 0
 768                                   && ((translit = find_translit (locale,
 769                                                                  charmap, wch))
 770                                       != NULL))
 771                                 /* The CTYPE data contains a matching
 772                                    transliteration.  */
 773                                 {
 774                                   int i;
 775
 776                                   for (i = 0; translit[i] != 0; ++i)
 777                                     {
 778                                       char utmp[10];
 779
 780                                       snprintf (utmp, sizeof (utmp), "U%08X",
 781                                                 translit[i]);
 782                                       seq = charmap_find_value (charmap, utmp,
 783                                                                 9);
 784                                       assert (seq != NULL);
 785                                       ADDS (seq->bytes, seq->nbytes);
 786                                     }
 787
 788                                   continue;
 789                                 }
 790                             }
 791 #endif  /* NO_TRANSLITERATION */
 792
 793                           /* Not a known name.  */
 794                           illegal_string = 1;
 795                         }
 796                     }
 797
 798                   if (seq != NULL)
 799                     ADDS (seq->bytes, seq->nbytes);
 800
 801                   continue;
 802                 }
 803             }
 804
 805           /* We now have the symbolic name in buf[startidx] to
 806              buf[bufact-1].  Now find out the value for this character
 807              in the charmap as well as in the repertoire map (in this
 808              order).  */
 809           seq = charmap_find_value (charmap, &buf[startidx],
 810                                     bufact - startidx);
 811
 812           if (seq == NULL)
 813             {
 814               /* This name is not in the charmap.  */
 815               lr_error (lr, _("symbol `%.*s' not in charmap"),
 816                         (int) (bufact - startidx), &buf[startidx]);
 817               illegal_string = 1;
 818             }
 819
 820           if (return_widestr)
 821             {
 822               /* Now the same for the multibyte representation.  */
 823               if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 824                 wch = seq->ucs4;
 825               else
 826                 {
 827                   wch = repertoire_find_value (repertoire, &buf[startidx],
 828                                                bufact - startidx);
 829                   if (seq != NULL)
 830                     seq->ucs4 = wch;
 831                 }
 832
 833               if (wch == ILLEGAL_CHAR_VALUE)
 834                 {
 835                   /* This name is not in the repertoire map.  */
 836                   lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 837                             (int) (bufact - startidx), &buf[startidx]);
 838                   illegal_string = 1;
 839                 }
 840               else
 841                 ADDWC (wch);
 842             }
 843
 844           /* Now forget about the name we just added.  */
 845           bufact = startidx;
 846
 847           /* And copy the bytes.  */
 848           if (seq != NULL)
 849             ADDS (seq->bytes, seq->nbytes);
 850         }
 851
 852       if (ch == '\n' || ch == EOF)
 853         {
 854           lr_error (lr, _("unterminated string"));
 855           illegal_string = 1;
 856         }
 857
 858       if (illegal_string)
 859         {
 860           free (buf);
 861           if (buf2 != NULL)
 862             free (buf2);
 863           lr->token.val.str.startmb = NULL;
 864           lr->token.val.str.lenmb = 0;
 865           lr->token.val.str.startwc = NULL;
 866           lr->token.val.str.lenwc = 0;
 867
 868           return &lr->token;
 869         }
 870
 871       ADDC ('\0');
 872
 873       if (return_widestr)
 874         {
 875           ADDWC (0);
 876           lr->token.val.str.startwc = xrealloc (buf2,
 877                                                 buf2act * sizeof (uint32_t));
 878           lr->token.val.str.lenwc = buf2act;
 879         }
 880     }
 881
 882   lr->token.val.str.startmb = xrealloc (buf, bufact);
 883   lr->token.val.str.lenmb = bufact;
 884
 885   return &lr->token;
 886 }