locale/programs/linereader.c

   1 /* Copyright (C) 1996,1997,1998,1999,2000,2001 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <ctype.h>
  25 #include <errno.h>
  26 #include <libintl.h>
  27 #include <stdarg.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "charmap.h"
  32 #include "error.h"
  33 #include "linereader.h"
  34
  35 /* Prototypes for a few program-wide used functions.  */
  36 extern void *xmalloc (size_t __n);
  37 extern void *xrealloc (void *__p, size_t __n);
  38 extern char *xstrdup (const char *__str);
  39
  40
  41 /* Prototypes for local functions.  */
  42 static struct token *get_toplvl_escape (struct linereader *lr);
  43 static struct token *get_symname (struct linereader *lr);
  44 static struct token *get_ident (struct linereader *lr);
  45 static struct token *get_string (struct linereader *lr,
  46                                  const struct charmap_t *charmap,
  47                                  const struct repertoire_t *repertoire,
  48                                  int verbose);
  49
  50
  51 struct linereader *
  52 lr_open (const char *fname, kw_hash_fct_t hf)
  53 {
  54   FILE *fp;
  55
  56   if (fname == NULL || strcmp (fname, "-") == 0
  57       || strcmp (fname, "/dev/stdin") == 0)
  58     return lr_create (stdin, "<stdin>", hf);
  59   else
  60     {
  61       fp = fopen (fname, "r");
  62       if (fp == NULL)
  63         return NULL;
  64       return lr_create (fp, fname, hf);
  65     }
  66 }
  67
  68 struct linereader *
  69 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
  70 {
  71   struct linereader *result;
  72   int n;
  73
  74   result = (struct linereader *) xmalloc (sizeof (*result));
  75
  76   result->fp = fp;
  77   result->fname = xstrdup (fname);
  78   result->buf = NULL;
  79   result->bufsize = 0;
  80   result->lineno = 1;
  81   result->idx = 0;
  82   result->comment_char = '#';
  83   result->escape_char = '\\';
  84   result->translate_strings = 1;
  85
  86   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  87   if (n < 0)
  88     {
  89       int save = errno;
  90       fclose (result->fp);
  91       free ((char *) result->fname);
  92       free (result);
  93       errno = save;
  94       return NULL;
  95     }
  96
  97   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
  98     n -= 2;
  99
 100   result->buf[n] = '\0';
 101   result->bufact = n;
 102   result->hash_fct = hf;
 103
 104   return result;
 105 }
 106
 107
 108 int
 109 lr_eof (struct linereader *lr)
 110 {
 111   return lr->bufact = 0;
 112 }
 113
 114
 115 void
 116 lr_close (struct linereader *lr)
 117 {
 118   fclose (lr->fp);
 119   free (lr->buf);
 120   free (lr);
 121 }
 122
 123
 124 int
 125 lr_next (struct linereader *lr)
 126 {
 127   int n;
 128
 129   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
 130   if (n < 0)
 131     return -1;
 132
 133   ++lr->lineno;
 134
 135   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
 136     {
 137 #if 0
 138       /* XXX Is this correct?  */
 139       /* An escaped newline character is substituted with a single <SP>.  */
 140       --n;
 141       lr->buf[n - 1] = ' ';
 142 #else
 143       n -= 2;
 144 #endif
 145     }
 146
 147   lr->buf[n] = '\0';
 148   lr->bufact = n;
 149   lr->idx = 0;
 150
 151   return 0;
 152 }
 153
 154
 155 /* Defined in error.c.  */
 156 /* This variable is incremented each time `error' is called.  */
 157 extern unsigned int error_message_count;
 158
 159 /* The calling program should define program_name and set it to the
 160    name of the executing program.  */
 161 extern char *program_name;
 162
 163
 164 struct token *
 165 lr_token (struct linereader *lr, const struct charmap_t *charmap,
 166           const struct repertoire_t *repertoire, int verbose)
 167 {
 168   int ch;
 169
 170   while (1)
 171     {
 172       do
 173         {
 174           ch = lr_getc (lr);
 175
 176           if (ch == EOF)
 177             {
 178               lr->token.tok = tok_eof;
 179               return &lr->token;
 180             };
 181
 182           if (ch == '\n')
 183             {
 184               lr->token.tok = tok_eol;
 185               return &lr->token;
 186             }
 187         }
 188       while (isspace (ch));
 189
 190       if (ch == EOF)
 191         {
 192           lr->token.tok = tok_eof;
 193           return &lr->token;
 194         };
 195
 196       if (ch != lr->comment_char)
 197         break;
 198
 199       /* Is there an newline at the end of the buffer?  */
 200       if (lr->buf[lr->bufact - 1] != '\n')
 201         {
 202           /* No.  Some people want this to mean that only the line in
 203              the file not the logical, concatenated line is ignored.
 204              Let's try this.  */
 205           lr->idx = lr->bufact;
 206           continue;
 207         }
 208
 209       /* Ignore rest of line.  */
 210       lr_ignore_rest (lr, 0);
 211       lr->token.tok = tok_eol;
 212       return &lr->token;
 213     }
 214
 215   /* Match escape sequences.  */
 216   if (ch == lr->escape_char)
 217     return get_toplvl_escape (lr);
 218
 219   /* Match ellipsis.  */
 220   if (ch == '.')
 221     {
 222       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 223         {
 224           int cnt;
 225           for (cnt = 0; cnt < 10; ++cnt)
 226             lr_getc (lr);
 227           lr->token.tok = tok_ellipsis4_2;
 228           return &lr->token;
 229         }
 230       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 231         {
 232           lr_getc (lr);
 233           lr_getc (lr);
 234           lr_getc (lr);
 235           lr->token.tok = tok_ellipsis4;
 236           return &lr->token;
 237         }
 238       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
 239         {
 240           lr_getc (lr);
 241           lr_getc (lr);
 242           lr->token.tok = tok_ellipsis3;
 243           return &lr->token;
 244         }
 245       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 246         {
 247           int cnt;
 248           for (cnt = 0; cnt < 6; ++cnt)
 249             lr_getc (lr);
 250           lr->token.tok = tok_ellipsis2_2;
 251           return &lr->token;
 252         }
 253       if (lr->buf[lr->idx] == '.')
 254         {
 255           lr_getc (lr);
 256           lr->token.tok = tok_ellipsis2;
 257           return &lr->token;
 258         }
 259     }
 260
 261   switch (ch)
 262     {
 263     case '<':
 264       return get_symname (lr);
 265
 266     case '0' ... '9':
 267       lr->token.tok = tok_number;
 268       lr->token.val.num = ch - '0';
 269
 270       while (isdigit (ch = lr_getc (lr)))
 271         {
 272           lr->token.val.num *= 10;
 273           lr->token.val.num += ch - '0';
 274         }
 275       if (isalpha (ch))
 276         lr_error (lr, _("garbage at end of number"));
 277       lr_ungetn (lr, 1);
 278
 279       return &lr->token;
 280
 281     case ';':
 282       lr->token.tok = tok_semicolon;
 283       return &lr->token;
 284
 285     case ',':
 286       lr->token.tok = tok_comma;
 287       return &lr->token;
 288
 289     case '(':
 290       lr->token.tok = tok_open_brace;
 291       return &lr->token;
 292
 293     case ')':
 294       lr->token.tok = tok_close_brace;
 295       return &lr->token;
 296
 297     case '"':
 298       return get_string (lr, charmap, repertoire, verbose);
 299
 300     case '-':
 301       ch = lr_getc (lr);
 302       if (ch == '1')
 303         {
 304           lr->token.tok = tok_minus1;
 305           return &lr->token;
 306         }
 307       lr_ungetn (lr, 2);
 308       break;
 309     }
 310
 311   return get_ident (lr);
 312 }
 313
 314
 315 static struct token *
 316 get_toplvl_escape (struct linereader *lr)
 317 {
 318   /* This is supposed to be a numeric value.  We return the
 319      numerical value and the number of bytes.  */
 320   size_t start_idx = lr->idx - 1;
 321   char *bytes = lr->token.val.charcode.bytes;
 322   int nbytes = 0;
 323   int ch;
 324
 325   do
 326     {
 327       unsigned int byte = 0;
 328       unsigned int base = 8;
 329
 330       ch = lr_getc (lr);
 331
 332       if (ch == 'd')
 333         {
 334           base = 10;
 335           ch = lr_getc (lr);
 336         }
 337       else if (ch == 'x')
 338         {
 339           base = 16;
 340           ch = lr_getc (lr);
 341         }
 342
 343       if ((base == 16 && !isxdigit (ch))
 344           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 345         {
 346         esc_error:
 347           lr->token.val.str.startmb = &lr->buf[start_idx];
 348
 349           while (ch != EOF && !isspace (ch))
 350             ch = lr_getc (lr);
 351           lr->token.val.str.lenmb = lr->idx - start_idx;
 352
 353           lr->token.tok = tok_error;
 354           return &lr->token;
 355         }
 356
 357       if (isdigit (ch))
 358         byte = ch - '0';
 359       else
 360         byte = tolower (ch) - 'a' + 10;
 361
 362       ch = lr_getc (lr);
 363       if ((base == 16 && !isxdigit (ch))
 364           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 365         goto esc_error;
 366
 367       byte *= base;
 368       if (isdigit (ch))
 369         byte += ch - '0';
 370       else
 371         byte += tolower (ch) - 'a' + 10;
 372
 373       ch = lr_getc (lr);
 374       if (base != 16 && isdigit (ch))
 375         {
 376           byte *= base;
 377           byte += ch - '0';
 378
 379           ch = lr_getc (lr);
 380         }
 381
 382       bytes[nbytes++] = byte;
 383     }
 384   while (ch == lr->escape_char
 385          && nbytes < sizeof (lr->token.val.charcode.bytes));
 386
 387   if (!isspace (ch))
 388     lr_error (lr, _("garbage at end of character code specification"));
 389
 390   lr_ungetn (lr, 1);
 391
 392   lr->token.tok = tok_charcode;
 393   lr->token.val.charcode.nbytes = nbytes;
 394
 395   return &lr->token;
 396 }
 397
 398
 399 #define ADDC(ch) \
 400   do                                                                          \
 401     {                                                                         \
 402       if (bufact == bufmax)                                                   \
 403         {                                                                     \
 404           bufmax *= 2;                                                        \
 405           buf = xrealloc (buf, bufmax);                                       \
 406         }                                                                     \
 407       buf[bufact++] = (ch);                                                   \
 408     }                                                                         \
 409   while (0)
 410
 411
 412 #define ADDS(s, l) \
 413   do                                                                          \
 414     {                                                                         \
 415       size_t _l = (l);                                                        \
 416       if (bufact + _l > bufmax)                                               \
 417         {                                                                     \
 418           if (bufact < _l)                                                    \
 419             bufact = _l;                                                      \
 420           bufmax *= 2;                                                        \
 421           buf = xrealloc (buf, bufmax);                                       \
 422         }                                                                     \
 423       memcpy (&buf[bufact], s, _l);                                           \
 424       bufact += _l;                                                           \
 425     }                                                                         \
 426   while (0)
 427
 428
 429 #define ADDWC(ch) \
 430   do                                                                          \
 431     {                                                                         \
 432       if (buf2act == buf2max)                                                 \
 433         {                                                                     \
 434           buf2max *= 2;                                                       \
 435           buf2 = xrealloc (buf2, buf2max * 4);                                \
 436         }                                                                     \
 437       buf2[buf2act++] = (ch);                                                 \
 438     }                                                                         \
 439   while (0)
 440
 441
 442 static struct token *
 443 get_symname (struct linereader *lr)
 444 {
 445   /* Symbol in brackets.  We must distinguish three kinds:
 446      1. reserved words
 447      2. ISO 10646 position values
 448      3. all other.  */
 449   char *buf;
 450   size_t bufact = 0;
 451   size_t bufmax = 56;
 452   const struct keyword_t *kw;
 453   int ch;
 454
 455   buf = (char *) xmalloc (bufmax);
 456
 457   do
 458     {
 459       ch = lr_getc (lr);
 460       if (ch == lr->escape_char)
 461         {
 462           int c2 = lr_getc (lr);
 463           ADDC (c2);
 464
 465           if (c2 == '\n')
 466             ch = '\n';
 467         }
 468       else
 469         ADDC (ch);
 470     }
 471   while (ch != '>' && ch != '\n');
 472
 473   if (ch == '\n')
 474     lr_error (lr, _("unterminated symbolic name"));
 475
 476   /* Test for ISO 10646 position value.  */
 477   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
 478     {
 479       char *cp = buf + 1;
 480       while (cp < &buf[bufact - 1] && isxdigit (*cp))
 481         ++cp;
 482
 483       if (cp == &buf[bufact - 1])
 484         {
 485           /* Yes, it is.  */
 486           lr->token.tok = tok_ucs4;
 487           lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
 488
 489           return &lr->token;
 490         }
 491     }
 492
 493   /* It is a symbolic name.  Test for reserved words.  */
 494   kw = lr->hash_fct (buf, bufact - 1);
 495
 496   if (kw != NULL && kw->symname_or_ident == 1)
 497     {
 498       lr->token.tok = kw->token;
 499       free (buf);
 500     }
 501   else
 502     {
 503       lr->token.tok = tok_bsymbol;
 504
 505       buf[bufact] = '\0';
 506       buf = xrealloc (buf, bufact + 1);
 507
 508       lr->token.val.str.startmb = buf;
 509       lr->token.val.str.lenmb = bufact - 1;
 510     }
 511
 512   return &lr->token;
 513 }
 514
 515
 516 static struct token *
 517 get_ident (struct linereader *lr)
 518 {
 519   char *buf;
 520   size_t bufact;
 521   size_t bufmax = 56;
 522   const struct keyword_t *kw;
 523   int ch;
 524
 525   buf = xmalloc (bufmax);
 526   bufact = 0;
 527
 528   ADDC (lr->buf[lr->idx - 1]);
 529
 530   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
 531          && ch != '<' && ch != ',' && ch != EOF)
 532     {
 533       if (ch == lr->escape_char)
 534         {
 535           ch = lr_getc (lr);
 536           if (ch == '\n' || ch == EOF)
 537             {
 538               lr_error (lr, _("invalid escape sequence"));
 539               break;
 540             }
 541         }
 542       ADDC (ch);
 543     }
 544
 545   lr_ungetc (lr, ch);
 546
 547   kw = lr->hash_fct (buf, bufact);
 548
 549   if (kw != NULL && kw->symname_or_ident == 0)
 550     {
 551       lr->token.tok = kw->token;
 552       free (buf);
 553     }
 554   else
 555     {
 556       lr->token.tok = tok_ident;
 557
 558       buf[bufact] = '\0';
 559       buf = xrealloc (buf, bufact + 1);
 560
 561       lr->token.val.str.startmb = buf;
 562       lr->token.val.str.lenmb = bufact;
 563     }
 564
 565   return &lr->token;
 566 }
 567
 568
 569 static struct token *
 570 get_string (struct linereader *lr, const struct charmap_t *charmap,
 571             const struct repertoire_t *repertoire, int verbose)
 572 {
 573   int return_widestr = lr->return_widestr;
 574   char *buf;
 575   wchar_t *buf2 = NULL;
 576   size_t bufact;
 577   size_t bufmax = 56;
 578
 579   /* We must return two different strings.  */
 580   buf = xmalloc (bufmax);
 581   bufact = 0;
 582
 583   /* We know it'll be a string.  */
 584   lr->token.tok = tok_string;
 585
 586   /* If we need not translate the strings (i.e., expand <...> parts)
 587      we can run a simple loop.  */
 588   if (!lr->translate_strings)
 589     {
 590       int ch;
 591
 592       buf2 = NULL;
 593       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 594         ADDC (ch);
 595
 596       /* Catch errors with trailing escape character.  */
 597       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
 598           && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
 599         {
 600           lr_error (lr, _("illegal escape sequence at end of string"));
 601           --bufact;
 602         }
 603       else if (ch == '\n' || ch == EOF)
 604         lr_error (lr, _("unterminated string"));
 605
 606       ADDC ('\0');
 607     }
 608   else
 609     {
 610       int illegal_string = 0;
 611       size_t buf2act = 0;
 612       size_t buf2max = 56 * sizeof (uint32_t);
 613       int ch;
 614       int warned = 0;
 615
 616       /* We have to provide the wide character result as well.  */
 617       if (return_widestr)
 618         buf2 = xmalloc (buf2max);
 619
 620       /* Read until the end of the string (or end of the line or file).  */
 621       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 622         {
 623           size_t startidx;
 624           uint32_t wch;
 625           struct charseq *seq;
 626
 627           if (ch != '<')
 628             {
 629               /* The standards leave it up to the implementation to decide
 630                  what to do with character which stand for themself.  We
 631                  could jump through hoops to find out the value relative to
 632                  the charmap and the repertoire map, but instead we leave
 633                  it up to the locale definition author to write a better
 634                  definition.  We assume here that every character which
 635                  stands for itself is encoded using ISO 8859-1.  Using the
 636                  escape character is allowed.  */
 637               if (ch == lr->escape_char)
 638                 {
 639                   ch = lr_getc (lr);
 640                   if (ch == '\n' || ch == EOF)
 641                     break;
 642                 }
 643
 644               if (verbose && !warned)
 645                 {
 646                   lr_error (lr, _("\
 647 non-symbolic character value should not be used"));
 648                   warned = 1;
 649                 }
 650
 651               ADDC (ch);
 652               if (return_widestr)
 653                 ADDWC ((uint32_t) ch);
 654
 655               continue;
 656             }
 657
 658           /* Now we have to search for the end of the symbolic name, i.e.,
 659              the closing '>'.  */
 660           startidx = bufact;
 661           while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
 662             {
 663               if (ch == lr->escape_char)
 664                 {
 665                   ch = lr_getc (lr);
 666                   if (ch == '\n' || ch == EOF)
 667                     break;
 668                 }
 669               ADDC (ch);
 670             }
 671           if (ch == '\n' || ch == EOF)
 672             /* Not a correct string.  */
 673             break;
 674           if (bufact == startidx)
 675             {
 676               /* <> is no correct name.  Ignore it and also signal an
 677                  error.  */
 678               illegal_string = 1;
 679               continue;
 680             }
 681
 682           /* It might be a Uxxxx symbol.  */
 683           if (buf[startidx] == 'U'
 684               && (bufact - startidx == 5 || bufact - startidx == 9))
 685             {
 686               char *cp = buf + startidx + 1;
 687               while (cp < &buf[bufact] && isxdigit (*cp))
 688                 ++cp;
 689
 690               if (cp == &buf[bufact])
 691                 {
 692                   char utmp[10];
 693                   const char *symbol = NULL;
 694
 695                   /* Yes, it is.  */
 696                   ADDC ('\0');
 697                   wch = strtoul (buf + startidx + 1, NULL, 16);
 698
 699                   /* Now forget about the name we just added.  */
 700                   bufact = startidx;
 701
 702                   if (return_widestr)
 703                     ADDWC (wch);
 704
 705                   /* See whether the charmap contains the Uxxxxxxxx names.  */
 706                   snprintf (utmp, sizeof (utmp), "U%08X", wch);
 707                   seq = charmap_find_value (charmap, utmp, 9);
 708
 709                   if (seq == NULL)
 710                     {
 711                      /* No, this isn't the case.  Now determine from
 712                         the repertoire the name of the character and
 713                         find it in the charmap.  */
 714                       if (repertoire != NULL)
 715                         symbol = repertoire_find_symbol (repertoire, wch);
 716
 717                       if (symbol == NULL)
 718                         /* We cannot generate a string since we
 719                            cannot map from the Unicode number to the
 720                            character symbol.  */
 721                         illegal_string = 1;
 722                       else
 723                         {
 724                           seq = charmap_find_value (charmap, symbol,
 725                                                     strlen (symbol));
 726
 727                           if (seq == NULL)
 728                             /* Not a known name.  */
 729                             illegal_string = 1;
 730                         }
 731                     }
 732
 733                   if (seq != NULL)
 734                     ADDS (seq->bytes, seq->nbytes);
 735
 736                   continue;
 737                 }
 738             }
 739
 740           /* We now have the symbolic name in buf[startidx] to
 741              buf[bufact-1].  Now find out the value for this character
 742              in the charmap as well as in the repertoire map (in this
 743              order).  */
 744           seq = charmap_find_value (charmap, &buf[startidx],
 745                                     bufact - startidx);
 746
 747           if (seq == NULL)
 748             {
 749               /* This name is not in the charmap.  */
 750               lr_error (lr, _("symbol `%.*s' not in charmap"),
 751                         (int) (bufact - startidx), &buf[startidx]);
 752               illegal_string = 1;
 753             }
 754
 755           if (return_widestr)
 756             {
 757               /* Now the same for the multibyte representation.  */
 758               if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 759                 wch = seq->ucs4;
 760               else
 761                 {
 762                   wch = repertoire_find_value (repertoire, &buf[startidx],
 763                                                bufact - startidx);
 764                   if (seq != NULL)
 765                     seq->ucs4 = wch;
 766                 }
 767
 768               if (wch == ILLEGAL_CHAR_VALUE)
 769                 {
 770                   /* This name is not in the repertoire map.  */
 771                   lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 772                             (int) (bufact - startidx), &buf[startidx]);
 773                   illegal_string = 1;
 774                 }
 775               else
 776                 ADDWC (wch);
 777             }
 778
 779           /* Now forget about the name we just added.  */
 780           bufact = startidx;
 781
 782           /* And copy the bytes.  */
 783           if (seq != NULL)
 784             ADDS (seq->bytes, seq->nbytes);
 785         }
 786
 787       if (ch == '\n' || ch == EOF)
 788         {
 789           lr_error (lr, _("unterminated string"));
 790           illegal_string = 1;
 791         }
 792
 793       if (illegal_string)
 794         {
 795           free (buf);
 796           if (buf2 != NULL)
 797             free (buf2);
 798           lr->token.val.str.startmb = NULL;
 799           lr->token.val.str.lenmb = 0;
 800           lr->token.val.str.startwc = NULL;
 801           lr->token.val.str.lenwc = 0;
 802
 803           return &lr->token;
 804         }
 805
 806       ADDC ('\0');
 807
 808       if (return_widestr)
 809         {
 810           ADDWC (0);
 811           lr->token.val.str.startwc = xrealloc (buf2,
 812                                                 buf2act * sizeof (uint32_t));
 813           lr->token.val.str.lenwc = buf2act;
 814         }
 815     }
 816
 817   lr->token.val.str.startmb = xrealloc (buf, bufact);
 818   lr->token.val.str.lenmb = bufact;
 819
 820   return &lr->token;
 821 }