locale/programs/linereader.c

   1 /* Copyright (C) 1996-2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <errno.h>
  27 #include <libintl.h>
  28 #include <stdarg.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #include "localedef.h"
  33 #include "charmap.h"
  34 #include "error.h"
  35 #include "linereader.h"
  36 #include "locfile.h"
  37
  38 /* Prototypes for local functions.  */
  39 static struct token *get_toplvl_escape (struct linereader *lr);
  40 static struct token *get_symname (struct linereader *lr);
  41 static struct token *get_ident (struct linereader *lr);
  42 static struct token *get_string (struct linereader *lr,
  43                                  const struct charmap_t *charmap,
  44                                  struct localedef_t *locale,
  45                                  const struct repertoire_t *repertoire,
  46                                  int verbose);
  47
  48
  49 struct linereader *
  50 lr_open (const char *fname, kw_hash_fct_t hf)
  51 {
  52   FILE *fp;
  53
  54   if (fname == NULL || strcmp (fname, "-") == 0
  55       || strcmp (fname, "/dev/stdin") == 0)
  56     return lr_create (stdin, "<stdin>", hf);
  57   else
  58     {
  59       fp = fopen (fname, "rm");
  60       if (fp == NULL)
  61         return NULL;
  62       return lr_create (fp, fname, hf);
  63     }
  64 }
  65
  66 struct linereader *
  67 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
  68 {
  69   struct linereader *result;
  70   int n;
  71
  72   result = (struct linereader *) xmalloc (sizeof (*result));
  73
  74   result->fp = fp;
  75   result->fname = xstrdup (fname);
  76   result->buf = NULL;
  77   result->bufsize = 0;
  78   result->lineno = 1;
  79   result->idx = 0;
  80   result->comment_char = '#';
  81   result->escape_char = '\\';
  82   result->translate_strings = 1;
  83   result->return_widestr = 0;
  84
  85   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  86   if (n < 0)
  87     {
  88       int save = errno;
  89       fclose (result->fp);
  90       free ((char *) result->fname);
  91       free (result);
  92       errno = save;
  93       return NULL;
  94     }
  95
  96   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
  97     n -= 2;
  98
  99   result->buf[n] = '\0';
 100   result->bufact = n;
 101   result->hash_fct = hf;
 102
 103   return result;
 104 }
 105
 106
 107 int
 108 lr_eof (struct linereader *lr)
 109 {
 110   return lr->bufact = 0;
 111 }
 112
 113
 114 void
 115 lr_ignore_rest (struct linereader *lr, int verbose)
 116 {
 117   if (verbose)
 118     {
 119       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
 120              && lr->buf[lr->idx] != lr->comment_char)
 121         if (lr->buf[lr->idx] == '\0')
 122           {
 123             if (lr_next (lr) < 0)
 124               return;
 125           }
 126         else
 127           ++lr->idx;
 128
 129       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
 130           && lr->buf[lr->idx] != lr->comment_char)
 131         lr_error (lr, _("trailing garbage at end of line"));
 132     }
 133
 134   /* Ignore continued line.  */
 135   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
 136     if (lr_next (lr) < 0)
 137       break;
 138
 139   lr->idx = lr->bufact;
 140 }
 141
 142
 143 void
 144 lr_close (struct linereader *lr)
 145 {
 146   fclose (lr->fp);
 147   free (lr->buf);
 148   free (lr);
 149 }
 150
 151
 152 int
 153 lr_next (struct linereader *lr)
 154 {
 155   int n;
 156
 157   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
 158   if (n < 0)
 159     return -1;
 160
 161   ++lr->lineno;
 162
 163   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
 164     {
 165 #if 0
 166       /* XXX Is this correct?  */
 167       /* An escaped newline character is substituted with a single <SP>.  */
 168       --n;
 169       lr->buf[n - 1] = ' ';
 170 #else
 171       n -= 2;
 172 #endif
 173     }
 174
 175   lr->buf[n] = '\0';
 176   lr->bufact = n;
 177   lr->idx = 0;
 178
 179   return 0;
 180 }
 181
 182
 183 /* Defined in error.c.  */
 184 /* This variable is incremented each time `error' is called.  */
 185 extern unsigned int error_message_count;
 186
 187 /* The calling program should define program_name and set it to the
 188    name of the executing program.  */
 189 extern char *program_name;
 190
 191
 192 struct token *
 193 lr_token (struct linereader *lr, const struct charmap_t *charmap,
 194           struct localedef_t *locale, const struct repertoire_t *repertoire,
 195           int verbose)
 196 {
 197   int ch;
 198
 199   while (1)
 200     {
 201       do
 202         {
 203           ch = lr_getc (lr);
 204
 205           if (ch == EOF)
 206             {
 207               lr->token.tok = tok_eof;
 208               return &lr->token;
 209             };
 210
 211           if (ch == '\n')
 212             {
 213               lr->token.tok = tok_eol;
 214               return &lr->token;
 215             }
 216         }
 217       while (isspace (ch));
 218
 219       if (ch == EOF)
 220         {
 221           lr->token.tok = tok_eof;
 222           return &lr->token;
 223         };
 224
 225       if (ch != lr->comment_char)
 226         break;
 227
 228       /* Is there an newline at the end of the buffer?  */
 229       if (lr->buf[lr->bufact - 1] != '\n')
 230         {
 231           /* No.  Some people want this to mean that only the line in
 232              the file not the logical, concatenated line is ignored.
 233              Let's try this.  */
 234           lr->idx = lr->bufact;
 235           continue;
 236         }
 237
 238       /* Ignore rest of line.  */
 239       lr_ignore_rest (lr, 0);
 240       lr->token.tok = tok_eol;
 241       return &lr->token;
 242     }
 243
 244   /* Match escape sequences.  */
 245   if (ch == lr->escape_char)
 246     return get_toplvl_escape (lr);
 247
 248   /* Match ellipsis.  */
 249   if (ch == '.')
 250     {
 251       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 252         {
 253           int cnt;
 254           for (cnt = 0; cnt < 10; ++cnt)
 255             lr_getc (lr);
 256           lr->token.tok = tok_ellipsis4_2;
 257           return &lr->token;
 258         }
 259       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 260         {
 261           lr_getc (lr);
 262           lr_getc (lr);
 263           lr_getc (lr);
 264           lr->token.tok = tok_ellipsis4;
 265           return &lr->token;
 266         }
 267       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
 268         {
 269           lr_getc (lr);
 270           lr_getc (lr);
 271           lr->token.tok = tok_ellipsis3;
 272           return &lr->token;
 273         }
 274       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 275         {
 276           int cnt;
 277           for (cnt = 0; cnt < 6; ++cnt)
 278             lr_getc (lr);
 279           lr->token.tok = tok_ellipsis2_2;
 280           return &lr->token;
 281         }
 282       if (lr->buf[lr->idx] == '.')
 283         {
 284           lr_getc (lr);
 285           lr->token.tok = tok_ellipsis2;
 286           return &lr->token;
 287         }
 288     }
 289
 290   switch (ch)
 291     {
 292     case '<':
 293       return get_symname (lr);
 294
 295     case '0' ... '9':
 296       lr->token.tok = tok_number;
 297       lr->token.val.num = ch - '0';
 298
 299       while (isdigit (ch = lr_getc (lr)))
 300         {
 301           lr->token.val.num *= 10;
 302           lr->token.val.num += ch - '0';
 303         }
 304       if (isalpha (ch))
 305         lr_error (lr, _("garbage at end of number"));
 306       lr_ungetn (lr, 1);
 307
 308       return &lr->token;
 309
 310     case ';':
 311       lr->token.tok = tok_semicolon;
 312       return &lr->token;
 313
 314     case ',':
 315       lr->token.tok = tok_comma;
 316       return &lr->token;
 317
 318     case '(':
 319       lr->token.tok = tok_open_brace;
 320       return &lr->token;
 321
 322     case ')':
 323       lr->token.tok = tok_close_brace;
 324       return &lr->token;
 325
 326     case '"':
 327       return get_string (lr, charmap, locale, repertoire, verbose);
 328
 329     case '-':
 330       ch = lr_getc (lr);
 331       if (ch == '1')
 332         {
 333           lr->token.tok = tok_minus1;
 334           return &lr->token;
 335         }
 336       lr_ungetn (lr, 2);
 337       break;
 338     }
 339
 340   return get_ident (lr);
 341 }
 342
 343
 344 static struct token *
 345 get_toplvl_escape (struct linereader *lr)
 346 {
 347   /* This is supposed to be a numeric value.  We return the
 348      numerical value and the number of bytes.  */
 349   size_t start_idx = lr->idx - 1;
 350   char *bytes = lr->token.val.charcode.bytes;
 351   int nbytes = 0;
 352   int ch;
 353
 354   do
 355     {
 356       unsigned int byte = 0;
 357       unsigned int base = 8;
 358
 359       ch = lr_getc (lr);
 360
 361       if (ch == 'd')
 362         {
 363           base = 10;
 364           ch = lr_getc (lr);
 365         }
 366       else if (ch == 'x')
 367         {
 368           base = 16;
 369           ch = lr_getc (lr);
 370         }
 371
 372       if ((base == 16 && !isxdigit (ch))
 373           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 374         {
 375         esc_error:
 376           lr->token.val.str.startmb = &lr->buf[start_idx];
 377
 378           while (ch != EOF && !isspace (ch))
 379             ch = lr_getc (lr);
 380           lr->token.val.str.lenmb = lr->idx - start_idx;
 381
 382           lr->token.tok = tok_error;
 383           return &lr->token;
 384         }
 385
 386       if (isdigit (ch))
 387         byte = ch - '0';
 388       else
 389         byte = tolower (ch) - 'a' + 10;
 390
 391       ch = lr_getc (lr);
 392       if ((base == 16 && !isxdigit (ch))
 393           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 394         goto esc_error;
 395
 396       byte *= base;
 397       if (isdigit (ch))
 398         byte += ch - '0';
 399       else
 400         byte += tolower (ch) - 'a' + 10;
 401
 402       ch = lr_getc (lr);
 403       if (base != 16 && isdigit (ch))
 404         {
 405           byte *= base;
 406           byte += ch - '0';
 407
 408           ch = lr_getc (lr);
 409         }
 410
 411       bytes[nbytes++] = byte;
 412     }
 413   while (ch == lr->escape_char
 414          && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
 415
 416   if (!isspace (ch))
 417     lr_error (lr, _("garbage at end of character code specification"));
 418
 419   lr_ungetn (lr, 1);
 420
 421   lr->token.tok = tok_charcode;
 422   lr->token.val.charcode.nbytes = nbytes;
 423
 424   return &lr->token;
 425 }
 426
 427
 428 #define ADDC(ch) \
 429   do                                                                          \
 430     {                                                                         \
 431       if (bufact == bufmax)                                                   \
 432         {                                                                     \
 433           bufmax *= 2;                                                        \
 434           buf = xrealloc (buf, bufmax);                                       \
 435         }                                                                     \
 436       buf[bufact++] = (ch);                                                   \
 437     }                                                                         \
 438   while (0)
 439
 440
 441 #define ADDS(s, l) \
 442   do                                                                          \
 443     {                                                                         \
 444       size_t _l = (l);                                                        \
 445       if (bufact + _l > bufmax)                                               \
 446         {                                                                     \
 447           if (bufact < _l)                                                    \
 448             bufact = _l;                                                      \
 449           bufmax *= 2;                                                        \
 450           buf = xrealloc (buf, bufmax);                                       \
 451         }                                                                     \
 452       memcpy (&buf[bufact], s, _l);                                           \
 453       bufact += _l;                                                           \
 454     }                                                                         \
 455   while (0)
 456
 457
 458 #define ADDWC(ch) \
 459   do                                                                          \
 460     {                                                                         \
 461       if (buf2act == buf2max)                                                 \
 462         {                                                                     \
 463           buf2max *= 2;                                                       \
 464           buf2 = xrealloc (buf2, buf2max * 4);                                \
 465         }                                                                     \
 466       buf2[buf2act++] = (ch);                                                 \
 467     }                                                                         \
 468   while (0)
 469
 470
 471 static struct token *
 472 get_symname (struct linereader *lr)
 473 {
 474   /* Symbol in brackets.  We must distinguish three kinds:
 475      1. reserved words
 476      2. ISO 10646 position values
 477      3. all other.  */
 478   char *buf;
 479   size_t bufact = 0;
 480   size_t bufmax = 56;
 481   const struct keyword_t *kw;
 482   int ch;
 483
 484   buf = (char *) xmalloc (bufmax);
 485
 486   do
 487     {
 488       ch = lr_getc (lr);
 489       if (ch == lr->escape_char)
 490         {
 491           int c2 = lr_getc (lr);
 492           ADDC (c2);
 493
 494           if (c2 == '\n')
 495             ch = '\n';
 496         }
 497       else
 498         ADDC (ch);
 499     }
 500   while (ch != '>' && ch != '\n');
 501
 502   if (ch == '\n')
 503     lr_error (lr, _("unterminated symbolic name"));
 504
 505   /* Test for ISO 10646 position value.  */
 506   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
 507     {
 508       char *cp = buf + 1;
 509       while (cp < &buf[bufact - 1] && isxdigit (*cp))
 510         ++cp;
 511
 512       if (cp == &buf[bufact - 1])
 513         {
 514           /* Yes, it is.  */
 515           lr->token.tok = tok_ucs4;
 516           lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
 517
 518           return &lr->token;
 519         }
 520     }
 521
 522   /* It is a symbolic name.  Test for reserved words.  */
 523   kw = lr->hash_fct (buf, bufact - 1);
 524
 525   if (kw != NULL && kw->symname_or_ident == 1)
 526     {
 527       lr->token.tok = kw->token;
 528       free (buf);
 529     }
 530   else
 531     {
 532       lr->token.tok = tok_bsymbol;
 533
 534       buf[bufact] = '\0';
 535       buf = xrealloc (buf, bufact + 1);
 536
 537       lr->token.val.str.startmb = buf;
 538       lr->token.val.str.lenmb = bufact - 1;
 539     }
 540
 541   return &lr->token;
 542 }
 543
 544
 545 static struct token *
 546 get_ident (struct linereader *lr)
 547 {
 548   char *buf;
 549   size_t bufact;
 550   size_t bufmax = 56;
 551   const struct keyword_t *kw;
 552   int ch;
 553
 554   buf = xmalloc (bufmax);
 555   bufact = 0;
 556
 557   ADDC (lr->buf[lr->idx - 1]);
 558
 559   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
 560          && ch != '<' && ch != ',' && ch != EOF)
 561     {
 562       if (ch == lr->escape_char)
 563         {
 564           ch = lr_getc (lr);
 565           if (ch == '\n' || ch == EOF)
 566             {
 567               lr_error (lr, _("invalid escape sequence"));
 568               break;
 569             }
 570         }
 571       ADDC (ch);
 572     }
 573
 574   lr_ungetc (lr, ch);
 575
 576   kw = lr->hash_fct (buf, bufact);
 577
 578   if (kw != NULL && kw->symname_or_ident == 0)
 579     {
 580       lr->token.tok = kw->token;
 581       free (buf);
 582     }
 583   else
 584     {
 585       lr->token.tok = tok_ident;
 586
 587       buf[bufact] = '\0';
 588       buf = xrealloc (buf, bufact + 1);
 589
 590       lr->token.val.str.startmb = buf;
 591       lr->token.val.str.lenmb = bufact;
 592     }
 593
 594   return &lr->token;
 595 }
 596
 597
 598 static struct token *
 599 get_string (struct linereader *lr, const struct charmap_t *charmap,
 600             struct localedef_t *locale, const struct repertoire_t *repertoire,
 601             int verbose)
 602 {
 603   int return_widestr = lr->return_widestr;
 604   char *buf;
 605   wchar_t *buf2 = NULL;
 606   size_t bufact;
 607   size_t bufmax = 56;
 608
 609   /* We must return two different strings.  */
 610   buf = xmalloc (bufmax);
 611   bufact = 0;
 612
 613   /* We know it'll be a string.  */
 614   lr->token.tok = tok_string;
 615
 616   /* If we need not translate the strings (i.e., expand <...> parts)
 617      we can run a simple loop.  */
 618   if (!lr->translate_strings)
 619     {
 620       int ch;
 621
 622       buf2 = NULL;
 623       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 624         ADDC (ch);
 625
 626       /* Catch errors with trailing escape character.  */
 627       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
 628           && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
 629         {
 630           lr_error (lr, _("illegal escape sequence at end of string"));
 631           --bufact;
 632         }
 633       else if (ch == '\n' || ch == EOF)
 634         lr_error (lr, _("unterminated string"));
 635
 636       ADDC ('\0');
 637     }
 638   else
 639     {
 640       int illegal_string = 0;
 641       size_t buf2act = 0;
 642       size_t buf2max = 56 * sizeof (uint32_t);
 643       int ch;
 644       int warned = 0;
 645
 646       /* We have to provide the wide character result as well.  */
 647       if (return_widestr)
 648         buf2 = xmalloc (buf2max);
 649
 650       /* Read until the end of the string (or end of the line or file).  */
 651       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 652         {
 653           size_t startidx;
 654           uint32_t wch;
 655           struct charseq *seq;
 656
 657           if (ch != '<')
 658             {
 659               /* The standards leave it up to the implementation to decide
 660                  what to do with character which stand for themself.  We
 661                  could jump through hoops to find out the value relative to
 662                  the charmap and the repertoire map, but instead we leave
 663                  it up to the locale definition author to write a better
 664                  definition.  We assume here that every character which
 665                  stands for itself is encoded using ISO 8859-1.  Using the
 666                  escape character is allowed.  */
 667               if (ch == lr->escape_char)
 668                 {
 669                   ch = lr_getc (lr);
 670                   if (ch == '\n' || ch == EOF)
 671                     break;
 672                 }
 673
 674               if (verbose && !warned)
 675                 {
 676                   lr_error (lr, _("\
 677 non-symbolic character value should not be used"));
 678                   warned = 1;
 679                 }
 680
 681               ADDC (ch);
 682               if (return_widestr)
 683                 ADDWC ((uint32_t) ch);
 684
 685               continue;
 686             }
 687
 688           /* Now we have to search for the end of the symbolic name, i.e.,
 689              the closing '>'.  */
 690           startidx = bufact;
 691           while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
 692             {
 693               if (ch == lr->escape_char)
 694                 {
 695                   ch = lr_getc (lr);
 696                   if (ch == '\n' || ch == EOF)
 697                     break;
 698                 }
 699               ADDC (ch);
 700             }
 701           if (ch == '\n' || ch == EOF)
 702             /* Not a correct string.  */
 703             break;
 704           if (bufact == startidx)
 705             {
 706               /* <> is no correct name.  Ignore it and also signal an
 707                  error.  */
 708               illegal_string = 1;
 709               continue;
 710             }
 711
 712           /* It might be a Uxxxx symbol.  */
 713           if (buf[startidx] == 'U'
 714               && (bufact - startidx == 5 || bufact - startidx == 9))
 715             {
 716               char *cp = buf + startidx + 1;
 717               while (cp < &buf[bufact] && isxdigit (*cp))
 718                 ++cp;
 719
 720               if (cp == &buf[bufact])
 721                 {
 722                   char utmp[10];
 723
 724                   /* Yes, it is.  */
 725                   ADDC ('\0');
 726                   wch = strtoul (buf + startidx + 1, NULL, 16);
 727
 728                   /* Now forget about the name we just added.  */
 729                   bufact = startidx;
 730
 731                   if (return_widestr)
 732                     ADDWC (wch);
 733
 734                   /* See whether the charmap contains the Uxxxxxxxx names.  */
 735                   snprintf (utmp, sizeof (utmp), "U%08X", wch);
 736                   seq = charmap_find_value (charmap, utmp, 9);
 737
 738                   if (seq == NULL)
 739                     {
 740                      /* No, this isn't the case.  Now determine from
 741                         the repertoire the name of the character and
 742                         find it in the charmap.  */
 743                       if (repertoire != NULL)
 744                         {
 745                           const char *symbol;
 746
 747                           symbol = repertoire_find_symbol (repertoire, wch);
 748
 749                           if (symbol != NULL)
 750                             seq = charmap_find_value (charmap, symbol,
 751                                                       strlen (symbol));
 752                         }
 753
 754                       if (seq == NULL)
 755                         {
 756 #ifndef NO_TRANSLITERATION
 757                           /* Transliterate if possible.  */
 758                           if (locale != NULL)
 759                             {
 760                               uint32_t *translit;
 761
 762                               if ((locale->avail & CTYPE_LOCALE) == 0)
 763                                 {
 764                                   /* Load the CTYPE data now.  */
 765                                   int old_needed = locale->needed;
 766
 767                                   locale->needed = 0;
 768                                   locale = load_locale (LC_CTYPE,
 769                                                         locale->name,
 770                                                         locale->repertoire_name,
 771                                                         charmap, locale);
 772                                   locale->needed = old_needed;
 773                                 }
 774
 775                               if ((locale->avail & CTYPE_LOCALE) != 0
 776                                   && ((translit = find_translit (locale,
 777                                                                  charmap, wch))
 778                                       != NULL))
 779                                 /* The CTYPE data contains a matching
 780                                    transliteration.  */
 781                                 {
 782                                   int i;
 783
 784                                   for (i = 0; translit[i] != 0; ++i)
 785                                     {
 786                                       char utmp[10];
 787
 788                                       snprintf (utmp, sizeof (utmp), "U%08X",
 789                                                 translit[i]);
 790                                       seq = charmap_find_value (charmap, utmp,
 791                                                                 9);
 792                                       assert (seq != NULL);
 793                                       ADDS (seq->bytes, seq->nbytes);
 794                                     }
 795
 796                                   continue;
 797                                 }
 798                             }
 799 #endif  /* NO_TRANSLITERATION */
 800
 801                           /* Not a known name.  */
 802                           illegal_string = 1;
 803                         }
 804                     }
 805
 806                   if (seq != NULL)
 807                     ADDS (seq->bytes, seq->nbytes);
 808
 809                   continue;
 810                 }
 811             }
 812
 813           /* We now have the symbolic name in buf[startidx] to
 814              buf[bufact-1].  Now find out the value for this character
 815              in the charmap as well as in the repertoire map (in this
 816              order).  */
 817           seq = charmap_find_value (charmap, &buf[startidx],
 818                                     bufact - startidx);
 819
 820           if (seq == NULL)
 821             {
 822               /* This name is not in the charmap.  */
 823               lr_error (lr, _("symbol `%.*s' not in charmap"),
 824                         (int) (bufact - startidx), &buf[startidx]);
 825               illegal_string = 1;
 826             }
 827
 828           if (return_widestr)
 829             {
 830               /* Now the same for the multibyte representation.  */
 831               if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 832                 wch = seq->ucs4;
 833               else
 834                 {
 835                   wch = repertoire_find_value (repertoire, &buf[startidx],
 836                                                bufact - startidx);
 837                   if (seq != NULL)
 838                     seq->ucs4 = wch;
 839                 }
 840
 841               if (wch == ILLEGAL_CHAR_VALUE)
 842                 {
 843                   /* This name is not in the repertoire map.  */
 844                   lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 845                             (int) (bufact - startidx), &buf[startidx]);
 846                   illegal_string = 1;
 847                 }
 848               else
 849                 ADDWC (wch);
 850             }
 851
 852           /* Now forget about the name we just added.  */
 853           bufact = startidx;
 854
 855           /* And copy the bytes.  */
 856           if (seq != NULL)
 857             ADDS (seq->bytes, seq->nbytes);
 858         }
 859
 860       if (ch == '\n' || ch == EOF)
 861         {
 862           lr_error (lr, _("unterminated string"));
 863           illegal_string = 1;
 864         }
 865
 866       if (illegal_string)
 867         {
 868           free (buf);
 869           if (buf2 != NULL)
 870             free (buf2);
 871           lr->token.val.str.startmb = NULL;
 872           lr->token.val.str.lenmb = 0;
 873           lr->token.val.str.startwc = NULL;
 874           lr->token.val.str.lenwc = 0;
 875
 876           return &lr->token;
 877         }
 878
 879       ADDC ('\0');
 880
 881       if (return_widestr)
 882         {
 883           ADDWC (0);
 884           lr->token.val.str.startwc = xrealloc (buf2,
 885                                                 buf2act * sizeof (uint32_t));
 886           lr->token.val.str.lenwc = buf2act;
 887         }
 888     }
 889
 890   lr->token.val.str.startmb = xrealloc (buf, bufact);
 891   lr->token.val.str.lenmb = bufact;
 892
 893   return &lr->token;
 894 }