locale/programs/linereader.c

   1 /* Copyright (C) 1996-2018 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21
  22 #include <assert.h>
  23 #include <ctype.h>
  24 #include <errno.h>
  25 #include <libintl.h>
  26 #include <stdarg.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <stdint.h>
  30
  31 #include "localedef.h"
  32 #include "charmap.h"
  33 #include "error.h"
  34 #include "linereader.h"
  35 #include "locfile.h"
  36
  37 /* Prototypes for local functions.  */
  38 static struct token *get_toplvl_escape (struct linereader *lr);
  39 static struct token *get_symname (struct linereader *lr);
  40 static struct token *get_ident (struct linereader *lr);
  41 static struct token *get_string (struct linereader *lr,
  42                                  const struct charmap_t *charmap,
  43                                  struct localedef_t *locale,
  44                                  const struct repertoire_t *repertoire,
  45                                  int verbose);
  46
  47
  48 struct linereader *
  49 lr_open (const char *fname, kw_hash_fct_t hf)
  50 {
  51   FILE *fp;
  52
  53   if (fname == NULL || strcmp (fname, "-") == 0
  54       || strcmp (fname, "/dev/stdin") == 0)
  55     return lr_create (stdin, "<stdin>", hf);
  56   else
  57     {
  58       fp = fopen (fname, "rm");
  59       if (fp == NULL)
  60         return NULL;
  61       return lr_create (fp, fname, hf);
  62     }
  63 }
  64
  65 struct linereader *
  66 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
  67 {
  68   struct linereader *result;
  69   int n;
  70
  71   result = (struct linereader *) xmalloc (sizeof (*result));
  72
  73   result->fp = fp;
  74   result->fname = xstrdup (fname);
  75   result->buf = NULL;
  76   result->bufsize = 0;
  77   result->lineno = 1;
  78   result->idx = 0;
  79   result->comment_char = '#';
  80   result->escape_char = '\\';
  81   result->translate_strings = 1;
  82   result->return_widestr = 0;
  83
  84   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  85   if (n < 0)
  86     {
  87       int save = errno;
  88       fclose (result->fp);
  89       free ((char *) result->fname);
  90       free (result);
  91       errno = save;
  92       return NULL;
  93     }
  94
  95   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
  96     n -= 2;
  97
  98   result->buf[n] = '\0';
  99   result->bufact = n;
 100   result->hash_fct = hf;
 101
 102   return result;
 103 }
 104
 105
 106 int
 107 lr_eof (struct linereader *lr)
 108 {
 109   return lr->bufact = 0;
 110 }
 111
 112
 113 void
 114 lr_ignore_rest (struct linereader *lr, int verbose)
 115 {
 116   if (verbose)
 117     {
 118       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
 119              && lr->buf[lr->idx] != lr->comment_char)
 120         if (lr->buf[lr->idx] == '\0')
 121           {
 122             if (lr_next (lr) < 0)
 123               return;
 124           }
 125         else
 126           ++lr->idx;
 127
 128       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
 129           && lr->buf[lr->idx] != lr->comment_char)
 130         lr_error (lr, _("trailing garbage at end of line"));
 131     }
 132
 133   /* Ignore continued line.  */
 134   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
 135     if (lr_next (lr) < 0)
 136       break;
 137
 138   lr->idx = lr->bufact;
 139 }
 140
 141
 142 void
 143 lr_close (struct linereader *lr)
 144 {
 145   fclose (lr->fp);
 146   free (lr->buf);
 147   free (lr);
 148 }
 149
 150
 151 int
 152 lr_next (struct linereader *lr)
 153 {
 154   int n;
 155
 156   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
 157   if (n < 0)
 158     return -1;
 159
 160   ++lr->lineno;
 161
 162   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
 163     {
 164 #if 0
 165       /* XXX Is this correct?  */
 166       /* An escaped newline character is substituted with a single <SP>.  */
 167       --n;
 168       lr->buf[n - 1] = ' ';
 169 #else
 170       n -= 2;
 171 #endif
 172     }
 173
 174   lr->buf[n] = '\0';
 175   lr->bufact = n;
 176   lr->idx = 0;
 177
 178   return 0;
 179 }
 180
 181
 182 /* Defined in error.c.  */
 183 /* This variable is incremented each time `error' is called.  */
 184 extern unsigned int error_message_count;
 185
 186 /* The calling program should define program_name and set it to the
 187    name of the executing program.  */
 188 extern char *program_name;
 189
 190
 191 struct token *
 192 lr_token (struct linereader *lr, const struct charmap_t *charmap,
 193           struct localedef_t *locale, const struct repertoire_t *repertoire,
 194           int verbose)
 195 {
 196   int ch;
 197
 198   while (1)
 199     {
 200       do
 201         {
 202           ch = lr_getc (lr);
 203
 204           if (ch == EOF)
 205             {
 206               lr->token.tok = tok_eof;
 207               return &lr->token;
 208             };
 209
 210           if (ch == '\n')
 211             {
 212               lr->token.tok = tok_eol;
 213               return &lr->token;
 214             }
 215         }
 216       while (isspace (ch));
 217
 218       if (ch != lr->comment_char)
 219         break;
 220
 221       /* Is there an newline at the end of the buffer?  */
 222       if (lr->buf[lr->bufact - 1] != '\n')
 223         {
 224           /* No.  Some people want this to mean that only the line in
 225              the file not the logical, concatenated line is ignored.
 226              Let's try this.  */
 227           lr->idx = lr->bufact;
 228           continue;
 229         }
 230
 231       /* Ignore rest of line.  */
 232       lr_ignore_rest (lr, 0);
 233       lr->token.tok = tok_eol;
 234       return &lr->token;
 235     }
 236
 237   /* Match escape sequences.  */
 238   if (ch == lr->escape_char)
 239     return get_toplvl_escape (lr);
 240
 241   /* Match ellipsis.  */
 242   if (ch == '.')
 243     {
 244       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 245         {
 246           int cnt;
 247           for (cnt = 0; cnt < 10; ++cnt)
 248             lr_getc (lr);
 249           lr->token.tok = tok_ellipsis4_2;
 250           return &lr->token;
 251         }
 252       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 253         {
 254           lr_getc (lr);
 255           lr_getc (lr);
 256           lr_getc (lr);
 257           lr->token.tok = tok_ellipsis4;
 258           return &lr->token;
 259         }
 260       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
 261         {
 262           lr_getc (lr);
 263           lr_getc (lr);
 264           lr->token.tok = tok_ellipsis3;
 265           return &lr->token;
 266         }
 267       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 268         {
 269           int cnt;
 270           for (cnt = 0; cnt < 6; ++cnt)
 271             lr_getc (lr);
 272           lr->token.tok = tok_ellipsis2_2;
 273           return &lr->token;
 274         }
 275       if (lr->buf[lr->idx] == '.')
 276         {
 277           lr_getc (lr);
 278           lr->token.tok = tok_ellipsis2;
 279           return &lr->token;
 280         }
 281     }
 282
 283   switch (ch)
 284     {
 285     case '<':
 286       return get_symname (lr);
 287
 288     case '0' ... '9':
 289       lr->token.tok = tok_number;
 290       lr->token.val.num = ch - '0';
 291
 292       while (isdigit (ch = lr_getc (lr)))
 293         {
 294           lr->token.val.num *= 10;
 295           lr->token.val.num += ch - '0';
 296         }
 297       if (isalpha (ch))
 298         lr_error (lr, _("garbage at end of number"));
 299       lr_ungetn (lr, 1);
 300
 301       return &lr->token;
 302
 303     case ';':
 304       lr->token.tok = tok_semicolon;
 305       return &lr->token;
 306
 307     case ',':
 308       lr->token.tok = tok_comma;
 309       return &lr->token;
 310
 311     case '(':
 312       lr->token.tok = tok_open_brace;
 313       return &lr->token;
 314
 315     case ')':
 316       lr->token.tok = tok_close_brace;
 317       return &lr->token;
 318
 319     case '"':
 320       return get_string (lr, charmap, locale, repertoire, verbose);
 321
 322     case '-':
 323       ch = lr_getc (lr);
 324       if (ch == '1')
 325         {
 326           lr->token.tok = tok_minus1;
 327           return &lr->token;
 328         }
 329       lr_ungetn (lr, 2);
 330       break;
 331     }
 332
 333   return get_ident (lr);
 334 }
 335
 336
 337 static struct token *
 338 get_toplvl_escape (struct linereader *lr)
 339 {
 340   /* This is supposed to be a numeric value.  We return the
 341      numerical value and the number of bytes.  */
 342   size_t start_idx = lr->idx - 1;
 343   unsigned char *bytes = lr->token.val.charcode.bytes;
 344   size_t nbytes = 0;
 345   int ch;
 346
 347   do
 348     {
 349       unsigned int byte = 0;
 350       unsigned int base = 8;
 351
 352       ch = lr_getc (lr);
 353
 354       if (ch == 'd')
 355         {
 356           base = 10;
 357           ch = lr_getc (lr);
 358         }
 359       else if (ch == 'x')
 360         {
 361           base = 16;
 362           ch = lr_getc (lr);
 363         }
 364
 365       if ((base == 16 && !isxdigit (ch))
 366           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 367         {
 368         esc_error:
 369           lr->token.val.str.startmb = &lr->buf[start_idx];
 370
 371           while (ch != EOF && !isspace (ch))
 372             ch = lr_getc (lr);
 373           lr->token.val.str.lenmb = lr->idx - start_idx;
 374
 375           lr->token.tok = tok_error;
 376           return &lr->token;
 377         }
 378
 379       if (isdigit (ch))
 380         byte = ch - '0';
 381       else
 382         byte = tolower (ch) - 'a' + 10;
 383
 384       ch = lr_getc (lr);
 385       if ((base == 16 && !isxdigit (ch))
 386           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 387         goto esc_error;
 388
 389       byte *= base;
 390       if (isdigit (ch))
 391         byte += ch - '0';
 392       else
 393         byte += tolower (ch) - 'a' + 10;
 394
 395       ch = lr_getc (lr);
 396       if (base != 16 && isdigit (ch))
 397         {
 398           byte *= base;
 399           byte += ch - '0';
 400
 401           ch = lr_getc (lr);
 402         }
 403
 404       bytes[nbytes++] = byte;
 405     }
 406   while (ch == lr->escape_char
 407          && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
 408
 409   if (!isspace (ch))
 410     lr_error (lr, _("garbage at end of character code specification"));
 411
 412   lr_ungetn (lr, 1);
 413
 414   lr->token.tok = tok_charcode;
 415   lr->token.val.charcode.nbytes = nbytes;
 416
 417   return &lr->token;
 418 }
 419
 420
 421 #define ADDC(ch) \
 422   do                                                                          \
 423     {                                                                         \
 424       if (bufact == bufmax)                                                   \
 425         {                                                                     \
 426           bufmax *= 2;                                                        \
 427           buf = xrealloc (buf, bufmax);                                       \
 428         }                                                                     \
 429       buf[bufact++] = (ch);                                                   \
 430     }                                                                         \
 431   while (0)
 432
 433
 434 #define ADDS(s, l) \
 435   do                                                                          \
 436     {                                                                         \
 437       size_t _l = (l);                                                        \
 438       if (bufact + _l > bufmax)                                               \
 439         {                                                                     \
 440           if (bufact < _l)                                                    \
 441             bufact = _l;                                                      \
 442           bufmax *= 2;                                                        \
 443           buf = xrealloc (buf, bufmax);                                       \
 444         }                                                                     \
 445       memcpy (&buf[bufact], s, _l);                                           \
 446       bufact += _l;                                                           \
 447     }                                                                         \
 448   while (0)
 449
 450
 451 #define ADDWC(ch) \
 452   do                                                                          \
 453     {                                                                         \
 454       if (buf2act == buf2max)                                                 \
 455         {                                                                     \
 456           buf2max *= 2;                                                       \
 457           buf2 = xrealloc (buf2, buf2max * 4);                                \
 458         }                                                                     \
 459       buf2[buf2act++] = (ch);                                                 \
 460     }                                                                         \
 461   while (0)
 462
 463
 464 static struct token *
 465 get_symname (struct linereader *lr)
 466 {
 467   /* Symbol in brackets.  We must distinguish three kinds:
 468      1. reserved words
 469      2. ISO 10646 position values
 470      3. all other.  */
 471   char *buf;
 472   size_t bufact = 0;
 473   size_t bufmax = 56;
 474   const struct keyword_t *kw;
 475   int ch;
 476
 477   buf = (char *) xmalloc (bufmax);
 478
 479   do
 480     {
 481       ch = lr_getc (lr);
 482       if (ch == lr->escape_char)
 483         {
 484           int c2 = lr_getc (lr);
 485           ADDC (c2);
 486
 487           if (c2 == '\n')
 488             ch = '\n';
 489         }
 490       else
 491         ADDC (ch);
 492     }
 493   while (ch != '>' && ch != '\n');
 494
 495   if (ch == '\n')
 496     lr_error (lr, _("unterminated symbolic name"));
 497
 498   /* Test for ISO 10646 position value.  */
 499   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
 500     {
 501       char *cp = buf + 1;
 502       while (cp < &buf[bufact - 1] && isxdigit (*cp))
 503         ++cp;
 504
 505       if (cp == &buf[bufact - 1])
 506         {
 507           /* Yes, it is.  */
 508           lr->token.tok = tok_ucs4;
 509           lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
 510
 511           return &lr->token;
 512         }
 513     }
 514
 515   /* It is a symbolic name.  Test for reserved words.  */
 516   kw = lr->hash_fct (buf, bufact - 1);
 517
 518   if (kw != NULL && kw->symname_or_ident == 1)
 519     {
 520       lr->token.tok = kw->token;
 521       free (buf);
 522     }
 523   else
 524     {
 525       lr->token.tok = tok_bsymbol;
 526
 527       buf = xrealloc (buf, bufact + 1);
 528       buf[bufact] = '\0';
 529
 530       lr->token.val.str.startmb = buf;
 531       lr->token.val.str.lenmb = bufact - 1;
 532     }
 533
 534   return &lr->token;
 535 }
 536
 537
 538 static struct token *
 539 get_ident (struct linereader *lr)
 540 {
 541   char *buf;
 542   size_t bufact;
 543   size_t bufmax = 56;
 544   const struct keyword_t *kw;
 545   int ch;
 546
 547   buf = xmalloc (bufmax);
 548   bufact = 0;
 549
 550   ADDC (lr->buf[lr->idx - 1]);
 551
 552   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
 553          && ch != '<' && ch != ',' && ch != EOF)
 554     {
 555       if (ch == lr->escape_char)
 556         {
 557           ch = lr_getc (lr);
 558           if (ch == '\n' || ch == EOF)
 559             {
 560               lr_error (lr, _("invalid escape sequence"));
 561               break;
 562             }
 563         }
 564       ADDC (ch);
 565     }
 566
 567   lr_ungetc (lr, ch);
 568
 569   kw = lr->hash_fct (buf, bufact);
 570
 571   if (kw != NULL && kw->symname_or_ident == 0)
 572     {
 573       lr->token.tok = kw->token;
 574       free (buf);
 575     }
 576   else
 577     {
 578       lr->token.tok = tok_ident;
 579
 580       buf = xrealloc (buf, bufact + 1);
 581       buf[bufact] = '\0';
 582
 583       lr->token.val.str.startmb = buf;
 584       lr->token.val.str.lenmb = bufact;
 585     }
 586
 587   return &lr->token;
 588 }
 589
 590
 591 static struct token *
 592 get_string (struct linereader *lr, const struct charmap_t *charmap,
 593             struct localedef_t *locale, const struct repertoire_t *repertoire,
 594             int verbose)
 595 {
 596   int return_widestr = lr->return_widestr;
 597   char *buf;
 598   wchar_t *buf2 = NULL;
 599   size_t bufact;
 600   size_t bufmax = 56;
 601
 602   /* We must return two different strings.  */
 603   buf = xmalloc (bufmax);
 604   bufact = 0;
 605
 606   /* We know it'll be a string.  */
 607   lr->token.tok = tok_string;
 608
 609   /* If we need not translate the strings (i.e., expand <...> parts)
 610      we can run a simple loop.  */
 611   if (!lr->translate_strings)
 612     {
 613       int ch;
 614
 615       buf2 = NULL;
 616       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 617         ADDC (ch);
 618
 619       /* Catch errors with trailing escape character.  */
 620       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
 621           && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
 622         {
 623           lr_error (lr, _("illegal escape sequence at end of string"));
 624           --bufact;
 625         }
 626       else if (ch == '\n' || ch == EOF)
 627         lr_error (lr, _("unterminated string"));
 628
 629       ADDC ('\0');
 630     }
 631   else
 632     {
 633       int illegal_string = 0;
 634       size_t buf2act = 0;
 635       size_t buf2max = 56 * sizeof (uint32_t);
 636       int ch;
 637
 638       /* We have to provide the wide character result as well.  */
 639       if (return_widestr)
 640         buf2 = xmalloc (buf2max);
 641
 642       /* Read until the end of the string (or end of the line or file).  */
 643       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 644         {
 645           size_t startidx;
 646           uint32_t wch;
 647           struct charseq *seq;
 648
 649           if (ch != '<')
 650             {
 651               /* The standards leave it up to the implementation to decide
 652                  what to do with character which stand for themself.  We
 653                  could jump through hoops to find out the value relative to
 654                  the charmap and the repertoire map, but instead we leave
 655                  it up to the locale definition author to write a better
 656                  definition.  We assume here that every character which
 657                  stands for itself is encoded using ISO 8859-1.  Using the
 658                  escape character is allowed.  */
 659               if (ch == lr->escape_char)
 660                 {
 661                   ch = lr_getc (lr);
 662                   if (ch == '\n' || ch == EOF)
 663                     break;
 664                 }
 665
 666               ADDC (ch);
 667               if (return_widestr)
 668                 ADDWC ((uint32_t) ch);
 669
 670               continue;
 671             }
 672
 673           /* Now we have to search for the end of the symbolic name, i.e.,
 674              the closing '>'.  */
 675           startidx = bufact;
 676           while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
 677             {
 678               if (ch == lr->escape_char)
 679                 {
 680                   ch = lr_getc (lr);
 681                   if (ch == '\n' || ch == EOF)
 682                     break;
 683                 }
 684               ADDC (ch);
 685             }
 686           if (ch == '\n' || ch == EOF)
 687             /* Not a correct string.  */
 688             break;
 689           if (bufact == startidx)
 690             {
 691               /* <> is no correct name.  Ignore it and also signal an
 692                  error.  */
 693               illegal_string = 1;
 694               continue;
 695             }
 696
 697           /* It might be a Uxxxx symbol.  */
 698           if (buf[startidx] == 'U'
 699               && (bufact - startidx == 5 || bufact - startidx == 9))
 700             {
 701               char *cp = buf + startidx + 1;
 702               while (cp < &buf[bufact] && isxdigit (*cp))
 703                 ++cp;
 704
 705               if (cp == &buf[bufact])
 706                 {
 707                   char utmp[10];
 708
 709                   /* Yes, it is.  */
 710                   ADDC ('\0');
 711                   wch = strtoul (buf + startidx + 1, NULL, 16);
 712
 713                   /* Now forget about the name we just added.  */
 714                   bufact = startidx;
 715
 716                   if (return_widestr)
 717                     ADDWC (wch);
 718
 719                   /* See whether the charmap contains the Uxxxxxxxx names.  */
 720                   snprintf (utmp, sizeof (utmp), "U%08X", wch);
 721                   seq = charmap_find_value (charmap, utmp, 9);
 722
 723                   if (seq == NULL)
 724                     {
 725                      /* No, this isn't the case.  Now determine from
 726                         the repertoire the name of the character and
 727                         find it in the charmap.  */
 728                       if (repertoire != NULL)
 729                         {
 730                           const char *symbol;
 731
 732                           symbol = repertoire_find_symbol (repertoire, wch);
 733
 734                           if (symbol != NULL)
 735                             seq = charmap_find_value (charmap, symbol,
 736                                                       strlen (symbol));
 737                         }
 738
 739                       if (seq == NULL)
 740                         {
 741 #ifndef NO_TRANSLITERATION
 742                           /* Transliterate if possible.  */
 743                           if (locale != NULL)
 744                             {
 745                               uint32_t *translit;
 746
 747                               if ((locale->avail & CTYPE_LOCALE) == 0)
 748                                 {
 749                                   /* Load the CTYPE data now.  */
 750                                   int old_needed = locale->needed;
 751
 752                                   locale->needed = 0;
 753                                   locale = load_locale (LC_CTYPE,
 754                                                         locale->name,
 755                                                         locale->repertoire_name,
 756                                                         charmap, locale);
 757                                   locale->needed = old_needed;
 758                                 }
 759
 760                               if ((locale->avail & CTYPE_LOCALE) != 0
 761                                   && ((translit = find_translit (locale,
 762                                                                  charmap, wch))
 763                                       != NULL))
 764                                 /* The CTYPE data contains a matching
 765                                    transliteration.  */
 766                                 {
 767                                   int i;
 768
 769                                   for (i = 0; translit[i] != 0; ++i)
 770                                     {
 771                                       char utmp[10];
 772
 773                                       snprintf (utmp, sizeof (utmp), "U%08X",
 774                                                 translit[i]);
 775                                       seq = charmap_find_value (charmap, utmp,
 776                                                                 9);
 777                                       assert (seq != NULL);
 778                                       ADDS (seq->bytes, seq->nbytes);
 779                                     }
 780
 781                                   continue;
 782                                 }
 783                             }
 784 #endif  /* NO_TRANSLITERATION */
 785
 786                           /* Not a known name.  */
 787                           illegal_string = 1;
 788                         }
 789                     }
 790
 791                   if (seq != NULL)
 792                     ADDS (seq->bytes, seq->nbytes);
 793
 794                   continue;
 795                 }
 796             }
 797
 798           /* We now have the symbolic name in buf[startidx] to
 799              buf[bufact-1].  Now find out the value for this character
 800              in the charmap as well as in the repertoire map (in this
 801              order).  */
 802           seq = charmap_find_value (charmap, &buf[startidx],
 803                                     bufact - startidx);
 804
 805           if (seq == NULL)
 806             {
 807               /* This name is not in the charmap.  */
 808               lr_error (lr, _("symbol `%.*s' not in charmap"),
 809                         (int) (bufact - startidx), &buf[startidx]);
 810               illegal_string = 1;
 811             }
 812
 813           if (return_widestr)
 814             {
 815               /* Now the same for the multibyte representation.  */
 816               if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 817                 wch = seq->ucs4;
 818               else
 819                 {
 820                   wch = repertoire_find_value (repertoire, &buf[startidx],
 821                                                bufact - startidx);
 822                   if (seq != NULL)
 823                     seq->ucs4 = wch;
 824                 }
 825
 826               if (wch == ILLEGAL_CHAR_VALUE)
 827                 {
 828                   /* This name is not in the repertoire map.  */
 829                   lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 830                             (int) (bufact - startidx), &buf[startidx]);
 831                   illegal_string = 1;
 832                 }
 833               else
 834                 ADDWC (wch);
 835             }
 836
 837           /* Now forget about the name we just added.  */
 838           bufact = startidx;
 839
 840           /* And copy the bytes.  */
 841           if (seq != NULL)
 842             ADDS (seq->bytes, seq->nbytes);
 843         }
 844
 845       if (ch == '\n' || ch == EOF)
 846         {
 847           lr_error (lr, _("unterminated string"));
 848           illegal_string = 1;
 849         }
 850
 851       if (illegal_string)
 852         {
 853           free (buf);
 854           free (buf2);
 855           lr->token.val.str.startmb = NULL;
 856           lr->token.val.str.lenmb = 0;
 857           lr->token.val.str.startwc = NULL;
 858           lr->token.val.str.lenwc = 0;
 859
 860           return &lr->token;
 861         }
 862
 863       ADDC ('\0');
 864
 865       if (return_widestr)
 866         {
 867           ADDWC (0);
 868           lr->token.val.str.startwc = xrealloc (buf2,
 869                                                 buf2act * sizeof (uint32_t));
 870           lr->token.val.str.lenwc = buf2act;
 871         }
 872     }
 873
 874   lr->token.val.str.startmb = xrealloc (buf, bufact);
 875   lr->token.val.str.lenmb = bufact;
 876
 877   return &lr->token;
 878 }