locale/programs/linereader.c

   1 /* Copyright (C) 1996-2001, 2002 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <errno.h>
  27 #include <libintl.h>
  28 #include <stdarg.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #include "localedef.h"
  33 #include "charmap.h"
  34 #include "error.h"
  35 #include "linereader.h"
  36 #include "locfile.h"
  37
  38 /* Prototypes for local functions.  */
  39 static struct token *get_toplvl_escape (struct linereader *lr);
  40 static struct token *get_symname (struct linereader *lr);
  41 static struct token *get_ident (struct linereader *lr);
  42 static struct token *get_string (struct linereader *lr,
  43                                  const struct charmap_t *charmap,
  44                                  struct localedef_t *locale,
  45                                  const struct repertoire_t *repertoire,
  46                                  int verbose);
  47
  48
  49 struct linereader *
  50 lr_open (const char *fname, kw_hash_fct_t hf)
  51 {
  52   FILE *fp;
  53
  54   if (fname == NULL || strcmp (fname, "-") == 0
  55       || strcmp (fname, "/dev/stdin") == 0)
  56     return lr_create (stdin, "<stdin>", hf);
  57   else
  58     {
  59       fp = fopen (fname, "r");
  60       if (fp == NULL)
  61         return NULL;
  62       return lr_create (fp, fname, hf);
  63     }
  64 }
  65
  66 struct linereader *
  67 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
  68 {
  69   struct linereader *result;
  70   int n;
  71
  72   result = (struct linereader *) xmalloc (sizeof (*result));
  73
  74   result->fp = fp;
  75   result->fname = xstrdup (fname);
  76   result->buf = NULL;
  77   result->bufsize = 0;
  78   result->lineno = 1;
  79   result->idx = 0;
  80   result->comment_char = '#';
  81   result->escape_char = '\\';
  82   result->translate_strings = 1;
  83
  84   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  85   if (n < 0)
  86     {
  87       int save = errno;
  88       fclose (result->fp);
  89       free ((char *) result->fname);
  90       free (result);
  91       errno = save;
  92       return NULL;
  93     }
  94
  95   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
  96     n -= 2;
  97
  98   result->buf[n] = '\0';
  99   result->bufact = n;
 100   result->hash_fct = hf;
 101
 102   return result;
 103 }
 104
 105
 106 int
 107 lr_eof (struct linereader *lr)
 108 {
 109   return lr->bufact = 0;
 110 }
 111
 112
 113 void
 114 lr_close (struct linereader *lr)
 115 {
 116   fclose (lr->fp);
 117   free (lr->buf);
 118   free (lr);
 119 }
 120
 121
 122 int
 123 lr_next (struct linereader *lr)
 124 {
 125   int n;
 126
 127   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
 128   if (n < 0)
 129     return -1;
 130
 131   ++lr->lineno;
 132
 133   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
 134     {
 135 #if 0
 136       /* XXX Is this correct?  */
 137       /* An escaped newline character is substituted with a single <SP>.  */
 138       --n;
 139       lr->buf[n - 1] = ' ';
 140 #else
 141       n -= 2;
 142 #endif
 143     }
 144
 145   lr->buf[n] = '\0';
 146   lr->bufact = n;
 147   lr->idx = 0;
 148
 149   return 0;
 150 }
 151
 152
 153 /* Defined in error.c.  */
 154 /* This variable is incremented each time `error' is called.  */
 155 extern unsigned int error_message_count;
 156
 157 /* The calling program should define program_name and set it to the
 158    name of the executing program.  */
 159 extern char *program_name;
 160
 161
 162 struct token *
 163 lr_token (struct linereader *lr, const struct charmap_t *charmap,
 164           struct localedef_t *locale, const struct repertoire_t *repertoire,
 165           int verbose)
 166 {
 167   int ch;
 168
 169   while (1)
 170     {
 171       do
 172         {
 173           ch = lr_getc (lr);
 174
 175           if (ch == EOF)
 176             {
 177               lr->token.tok = tok_eof;
 178               return &lr->token;
 179             };
 180
 181           if (ch == '\n')
 182             {
 183               lr->token.tok = tok_eol;
 184               return &lr->token;
 185             }
 186         }
 187       while (isspace (ch));
 188
 189       if (ch == EOF)
 190         {
 191           lr->token.tok = tok_eof;
 192           return &lr->token;
 193         };
 194
 195       if (ch != lr->comment_char)
 196         break;
 197
 198       /* Is there an newline at the end of the buffer?  */
 199       if (lr->buf[lr->bufact - 1] != '\n')
 200         {
 201           /* No.  Some people want this to mean that only the line in
 202              the file not the logical, concatenated line is ignored.
 203              Let's try this.  */
 204           lr->idx = lr->bufact;
 205           continue;
 206         }
 207
 208       /* Ignore rest of line.  */
 209       lr_ignore_rest (lr, 0);
 210       lr->token.tok = tok_eol;
 211       return &lr->token;
 212     }
 213
 214   /* Match escape sequences.  */
 215   if (ch == lr->escape_char)
 216     return get_toplvl_escape (lr);
 217
 218   /* Match ellipsis.  */
 219   if (ch == '.')
 220     {
 221       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 222         {
 223           int cnt;
 224           for (cnt = 0; cnt < 10; ++cnt)
 225             lr_getc (lr);
 226           lr->token.tok = tok_ellipsis4_2;
 227           return &lr->token;
 228         }
 229       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 230         {
 231           lr_getc (lr);
 232           lr_getc (lr);
 233           lr_getc (lr);
 234           lr->token.tok = tok_ellipsis4;
 235           return &lr->token;
 236         }
 237       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
 238         {
 239           lr_getc (lr);
 240           lr_getc (lr);
 241           lr->token.tok = tok_ellipsis3;
 242           return &lr->token;
 243         }
 244       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 245         {
 246           int cnt;
 247           for (cnt = 0; cnt < 6; ++cnt)
 248             lr_getc (lr);
 249           lr->token.tok = tok_ellipsis2_2;
 250           return &lr->token;
 251         }
 252       if (lr->buf[lr->idx] == '.')
 253         {
 254           lr_getc (lr);
 255           lr->token.tok = tok_ellipsis2;
 256           return &lr->token;
 257         }
 258     }
 259
 260   switch (ch)
 261     {
 262     case '<':
 263       return get_symname (lr);
 264
 265     case '0' ... '9':
 266       lr->token.tok = tok_number;
 267       lr->token.val.num = ch - '0';
 268
 269       while (isdigit (ch = lr_getc (lr)))
 270         {
 271           lr->token.val.num *= 10;
 272           lr->token.val.num += ch - '0';
 273         }
 274       if (isalpha (ch))
 275         lr_error (lr, _("garbage at end of number"));
 276       lr_ungetn (lr, 1);
 277
 278       return &lr->token;
 279
 280     case ';':
 281       lr->token.tok = tok_semicolon;
 282       return &lr->token;
 283
 284     case ',':
 285       lr->token.tok = tok_comma;
 286       return &lr->token;
 287
 288     case '(':
 289       lr->token.tok = tok_open_brace;
 290       return &lr->token;
 291
 292     case ')':
 293       lr->token.tok = tok_close_brace;
 294       return &lr->token;
 295
 296     case '"':
 297       return get_string (lr, charmap, locale, repertoire, verbose);
 298
 299     case '-':
 300       ch = lr_getc (lr);
 301       if (ch == '1')
 302         {
 303           lr->token.tok = tok_minus1;
 304           return &lr->token;
 305         }
 306       lr_ungetn (lr, 2);
 307       break;
 308     }
 309
 310   return get_ident (lr);
 311 }
 312
 313
 314 static struct token *
 315 get_toplvl_escape (struct linereader *lr)
 316 {
 317   /* This is supposed to be a numeric value.  We return the
 318      numerical value and the number of bytes.  */
 319   size_t start_idx = lr->idx - 1;
 320   char *bytes = lr->token.val.charcode.bytes;
 321   int nbytes = 0;
 322   int ch;
 323
 324   do
 325     {
 326       unsigned int byte = 0;
 327       unsigned int base = 8;
 328
 329       ch = lr_getc (lr);
 330
 331       if (ch == 'd')
 332         {
 333           base = 10;
 334           ch = lr_getc (lr);
 335         }
 336       else if (ch == 'x')
 337         {
 338           base = 16;
 339           ch = lr_getc (lr);
 340         }
 341
 342       if ((base == 16 && !isxdigit (ch))
 343           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 344         {
 345         esc_error:
 346           lr->token.val.str.startmb = &lr->buf[start_idx];
 347
 348           while (ch != EOF && !isspace (ch))
 349             ch = lr_getc (lr);
 350           lr->token.val.str.lenmb = lr->idx - start_idx;
 351
 352           lr->token.tok = tok_error;
 353           return &lr->token;
 354         }
 355
 356       if (isdigit (ch))
 357         byte = ch - '0';
 358       else
 359         byte = tolower (ch) - 'a' + 10;
 360
 361       ch = lr_getc (lr);
 362       if ((base == 16 && !isxdigit (ch))
 363           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 364         goto esc_error;
 365
 366       byte *= base;
 367       if (isdigit (ch))
 368         byte += ch - '0';
 369       else
 370         byte += tolower (ch) - 'a' + 10;
 371
 372       ch = lr_getc (lr);
 373       if (base != 16 && isdigit (ch))
 374         {
 375           byte *= base;
 376           byte += ch - '0';
 377
 378           ch = lr_getc (lr);
 379         }
 380
 381       bytes[nbytes++] = byte;
 382     }
 383   while (ch == lr->escape_char
 384          && nbytes < sizeof (lr->token.val.charcode.bytes));
 385
 386   if (!isspace (ch))
 387     lr_error (lr, _("garbage at end of character code specification"));
 388
 389   lr_ungetn (lr, 1);
 390
 391   lr->token.tok = tok_charcode;
 392   lr->token.val.charcode.nbytes = nbytes;
 393
 394   return &lr->token;
 395 }
 396
 397
 398 #define ADDC(ch) \
 399   do                                                                          \
 400     {                                                                         \
 401       if (bufact == bufmax)                                                   \
 402         {                                                                     \
 403           bufmax *= 2;                                                        \
 404           buf = xrealloc (buf, bufmax);                                       \
 405         }                                                                     \
 406       buf[bufact++] = (ch);                                                   \
 407     }                                                                         \
 408   while (0)
 409
 410
 411 #define ADDS(s, l) \
 412   do                                                                          \
 413     {                                                                         \
 414       size_t _l = (l);                                                        \
 415       if (bufact + _l > bufmax)                                               \
 416         {                                                                     \
 417           if (bufact < _l)                                                    \
 418             bufact = _l;                                                      \
 419           bufmax *= 2;                                                        \
 420           buf = xrealloc (buf, bufmax);                                       \
 421         }                                                                     \
 422       memcpy (&buf[bufact], s, _l);                                           \
 423       bufact += _l;                                                           \
 424     }                                                                         \
 425   while (0)
 426
 427
 428 #define ADDWC(ch) \
 429   do                                                                          \
 430     {                                                                         \
 431       if (buf2act == buf2max)                                                 \
 432         {                                                                     \
 433           buf2max *= 2;                                                       \
 434           buf2 = xrealloc (buf2, buf2max * 4);                                \
 435         }                                                                     \
 436       buf2[buf2act++] = (ch);                                                 \
 437     }                                                                         \
 438   while (0)
 439
 440
 441 static struct token *
 442 get_symname (struct linereader *lr)
 443 {
 444   /* Symbol in brackets.  We must distinguish three kinds:
 445      1. reserved words
 446      2. ISO 10646 position values
 447      3. all other.  */
 448   char *buf;
 449   size_t bufact = 0;
 450   size_t bufmax = 56;
 451   const struct keyword_t *kw;
 452   int ch;
 453
 454   buf = (char *) xmalloc (bufmax);
 455
 456   do
 457     {
 458       ch = lr_getc (lr);
 459       if (ch == lr->escape_char)
 460         {
 461           int c2 = lr_getc (lr);
 462           ADDC (c2);
 463
 464           if (c2 == '\n')
 465             ch = '\n';
 466         }
 467       else
 468         ADDC (ch);
 469     }
 470   while (ch != '>' && ch != '\n');
 471
 472   if (ch == '\n')
 473     lr_error (lr, _("unterminated symbolic name"));
 474
 475   /* Test for ISO 10646 position value.  */
 476   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
 477     {
 478       char *cp = buf + 1;
 479       while (cp < &buf[bufact - 1] && isxdigit (*cp))
 480         ++cp;
 481
 482       if (cp == &buf[bufact - 1])
 483         {
 484           /* Yes, it is.  */
 485           lr->token.tok = tok_ucs4;
 486           lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
 487
 488           return &lr->token;
 489         }
 490     }
 491
 492   /* It is a symbolic name.  Test for reserved words.  */
 493   kw = lr->hash_fct (buf, bufact - 1);
 494
 495   if (kw != NULL && kw->symname_or_ident == 1)
 496     {
 497       lr->token.tok = kw->token;
 498       free (buf);
 499     }
 500   else
 501     {
 502       lr->token.tok = tok_bsymbol;
 503
 504       buf[bufact] = '\0';
 505       buf = xrealloc (buf, bufact + 1);
 506
 507       lr->token.val.str.startmb = buf;
 508       lr->token.val.str.lenmb = bufact - 1;
 509     }
 510
 511   return &lr->token;
 512 }
 513
 514
 515 static struct token *
 516 get_ident (struct linereader *lr)
 517 {
 518   char *buf;
 519   size_t bufact;
 520   size_t bufmax = 56;
 521   const struct keyword_t *kw;
 522   int ch;
 523
 524   buf = xmalloc (bufmax);
 525   bufact = 0;
 526
 527   ADDC (lr->buf[lr->idx - 1]);
 528
 529   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
 530          && ch != '<' && ch != ',' && ch != EOF)
 531     {
 532       if (ch == lr->escape_char)
 533         {
 534           ch = lr_getc (lr);
 535           if (ch == '\n' || ch == EOF)
 536             {
 537               lr_error (lr, _("invalid escape sequence"));
 538               break;
 539             }
 540         }
 541       ADDC (ch);
 542     }
 543
 544   lr_ungetc (lr, ch);
 545
 546   kw = lr->hash_fct (buf, bufact);
 547
 548   if (kw != NULL && kw->symname_or_ident == 0)
 549     {
 550       lr->token.tok = kw->token;
 551       free (buf);
 552     }
 553   else
 554     {
 555       lr->token.tok = tok_ident;
 556
 557       buf[bufact] = '\0';
 558       buf = xrealloc (buf, bufact + 1);
 559
 560       lr->token.val.str.startmb = buf;
 561       lr->token.val.str.lenmb = bufact;
 562     }
 563
 564   return &lr->token;
 565 }
 566
 567
 568 static struct token *
 569 get_string (struct linereader *lr, const struct charmap_t *charmap,
 570             struct localedef_t *locale, const struct repertoire_t *repertoire,
 571             int verbose)
 572 {
 573   int return_widestr = lr->return_widestr;
 574   char *buf;
 575   wchar_t *buf2 = NULL;
 576   size_t bufact;
 577   size_t bufmax = 56;
 578
 579   /* We must return two different strings.  */
 580   buf = xmalloc (bufmax);
 581   bufact = 0;
 582
 583   /* We know it'll be a string.  */
 584   lr->token.tok = tok_string;
 585
 586   /* If we need not translate the strings (i.e., expand <...> parts)
 587      we can run a simple loop.  */
 588   if (!lr->translate_strings)
 589     {
 590       int ch;
 591
 592       buf2 = NULL;
 593       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 594         ADDC (ch);
 595
 596       /* Catch errors with trailing escape character.  */
 597       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
 598           && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
 599         {
 600           lr_error (lr, _("illegal escape sequence at end of string"));
 601           --bufact;
 602         }
 603       else if (ch == '\n' || ch == EOF)
 604         lr_error (lr, _("unterminated string"));
 605
 606       ADDC ('\0');
 607     }
 608   else
 609     {
 610       int illegal_string = 0;
 611       size_t buf2act = 0;
 612       size_t buf2max = 56 * sizeof (uint32_t);
 613       int ch;
 614       int warned = 0;
 615
 616       /* We have to provide the wide character result as well.  */
 617       if (return_widestr)
 618         buf2 = xmalloc (buf2max);
 619
 620       /* Read until the end of the string (or end of the line or file).  */
 621       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 622         {
 623           size_t startidx;
 624           uint32_t wch;
 625           struct charseq *seq;
 626
 627           if (ch != '<')
 628             {
 629               /* The standards leave it up to the implementation to decide
 630                  what to do with character which stand for themself.  We
 631                  could jump through hoops to find out the value relative to
 632                  the charmap and the repertoire map, but instead we leave
 633                  it up to the locale definition author to write a better
 634                  definition.  We assume here that every character which
 635                  stands for itself is encoded using ISO 8859-1.  Using the
 636                  escape character is allowed.  */
 637               if (ch == lr->escape_char)
 638                 {
 639                   ch = lr_getc (lr);
 640                   if (ch == '\n' || ch == EOF)
 641                     break;
 642                 }
 643
 644               if (verbose && !warned)
 645                 {
 646                   lr_error (lr, _("\
 647 non-symbolic character value should not be used"));
 648                   warned = 1;
 649                 }
 650
 651               ADDC (ch);
 652               if (return_widestr)
 653                 ADDWC ((uint32_t) ch);
 654
 655               continue;
 656             }
 657
 658           /* Now we have to search for the end of the symbolic name, i.e.,
 659              the closing '>'.  */
 660           startidx = bufact;
 661           while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
 662             {
 663               if (ch == lr->escape_char)
 664                 {
 665                   ch = lr_getc (lr);
 666                   if (ch == '\n' || ch == EOF)
 667                     break;
 668                 }
 669               ADDC (ch);
 670             }
 671           if (ch == '\n' || ch == EOF)
 672             /* Not a correct string.  */
 673             break;
 674           if (bufact == startidx)
 675             {
 676               /* <> is no correct name.  Ignore it and also signal an
 677                  error.  */
 678               illegal_string = 1;
 679               continue;
 680             }
 681
 682           /* It might be a Uxxxx symbol.  */
 683           if (buf[startidx] == 'U'
 684               && (bufact - startidx == 5 || bufact - startidx == 9))
 685             {
 686               char *cp = buf + startidx + 1;
 687               while (cp < &buf[bufact] && isxdigit (*cp))
 688                 ++cp;
 689
 690               if (cp == &buf[bufact])
 691                 {
 692                   char utmp[10];
 693
 694                   /* Yes, it is.  */
 695                   ADDC ('\0');
 696                   wch = strtoul (buf + startidx + 1, NULL, 16);
 697
 698                   /* Now forget about the name we just added.  */
 699                   bufact = startidx;
 700
 701                   if (return_widestr)
 702                     ADDWC (wch);
 703
 704                   /* See whether the charmap contains the Uxxxxxxxx names.  */
 705                   snprintf (utmp, sizeof (utmp), "U%08X", wch);
 706                   seq = charmap_find_value (charmap, utmp, 9);
 707
 708                   if (seq == NULL)
 709                     {
 710                      /* No, this isn't the case.  Now determine from
 711                         the repertoire the name of the character and
 712                         find it in the charmap.  */
 713                       if (repertoire != NULL)
 714                         {
 715                           const char *symbol;
 716
 717                           symbol = repertoire_find_symbol (repertoire, wch);
 718
 719                           if (symbol != NULL)
 720                             seq = charmap_find_value (charmap, symbol,
 721                                                       strlen (symbol));
 722                         }
 723
 724                       if (seq == NULL)
 725                         {
 726 #ifndef NO_TRANSLITERATION
 727                           /* Transliterate if possible.  */
 728                           if (locale != NULL)
 729                             {
 730                               uint32_t *translit;
 731
 732                               if ((locale->avail & CTYPE_LOCALE) == 0)
 733                                 {
 734                                   /* Load the CTYPE data now.  */
 735                                   int old_needed = locale->needed;
 736
 737                                   locale->needed = 0;
 738                                   locale = load_locale (CTYPE_LOCALE,
 739                                                         locale->name,
 740                                                         locale->repertoire_name,
 741                                                         charmap, locale);
 742                                   locale->needed = old_needed;
 743                                 }
 744
 745                               if ((locale->avail & CTYPE_LOCALE) != 0
 746                                   && ((translit = find_translit (locale,
 747                                                                  charmap, wch))
 748                                       != NULL))
 749                                 /* The CTYPE data contains a matching
 750                                    transliteration.  */
 751                                 {
 752                                   int i;
 753
 754                                   for (i = 0; translit[i] != 0; ++i)
 755                                     {
 756                                       char utmp[10];
 757
 758                                       snprintf (utmp, sizeof (utmp), "U%08X",
 759                                                 translit[i]);
 760                                       seq = charmap_find_value (charmap, utmp,
 761                                                                 9);
 762                                       assert (seq != NULL);
 763                                       ADDS (seq->bytes, seq->nbytes);
 764                                     }
 765
 766                                   continue;
 767                                 }
 768                             }
 769 #endif  /* NO_TRANSLITERATION */
 770
 771                           /* Not a known name.  */
 772                           illegal_string = 1;
 773                         }
 774                     }
 775
 776                   if (seq != NULL)
 777                     ADDS (seq->bytes, seq->nbytes);
 778
 779                   continue;
 780                 }
 781             }
 782
 783           /* We now have the symbolic name in buf[startidx] to
 784              buf[bufact-1].  Now find out the value for this character
 785              in the charmap as well as in the repertoire map (in this
 786              order).  */
 787           seq = charmap_find_value (charmap, &buf[startidx],
 788                                     bufact - startidx);
 789
 790           if (seq == NULL)
 791             {
 792               /* This name is not in the charmap.  */
 793               lr_error (lr, _("symbol `%.*s' not in charmap"),
 794                         (int) (bufact - startidx), &buf[startidx]);
 795               illegal_string = 1;
 796             }
 797
 798           if (return_widestr)
 799             {
 800               /* Now the same for the multibyte representation.  */
 801               if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 802                 wch = seq->ucs4;
 803               else
 804                 {
 805                   wch = repertoire_find_value (repertoire, &buf[startidx],
 806                                                bufact - startidx);
 807                   if (seq != NULL)
 808                     seq->ucs4 = wch;
 809                 }
 810
 811               if (wch == ILLEGAL_CHAR_VALUE)
 812                 {
 813                   /* This name is not in the repertoire map.  */
 814                   lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 815                             (int) (bufact - startidx), &buf[startidx]);
 816                   illegal_string = 1;
 817                 }
 818               else
 819                 ADDWC (wch);
 820             }
 821
 822           /* Now forget about the name we just added.  */
 823           bufact = startidx;
 824
 825           /* And copy the bytes.  */
 826           if (seq != NULL)
 827             ADDS (seq->bytes, seq->nbytes);
 828         }
 829
 830       if (ch == '\n' || ch == EOF)
 831         {
 832           lr_error (lr, _("unterminated string"));
 833           illegal_string = 1;
 834         }
 835
 836       if (illegal_string)
 837         {
 838           free (buf);
 839           if (buf2 != NULL)
 840             free (buf2);
 841           lr->token.val.str.startmb = NULL;
 842           lr->token.val.str.lenmb = 0;
 843           lr->token.val.str.startwc = NULL;
 844           lr->token.val.str.lenwc = 0;
 845
 846           return &lr->token;
 847         }
 848
 849       ADDC ('\0');
 850
 851       if (return_widestr)
 852         {
 853           ADDWC (0);
 854           lr->token.val.str.startwc = xrealloc (buf2,
 855                                                 buf2act * sizeof (uint32_t));
 856           lr->token.val.str.lenwc = buf2act;
 857         }
 858     }
 859
 860   lr->token.val.str.startmb = xrealloc (buf, bufact);
 861   lr->token.val.str.lenmb = bufact;
 862
 863   return &lr->token;
 864 }