usr.bin/localedef/scanner.c

   1 /*
   2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright 2015 John Marino <draco@marino.st>
   4  *
   5  * This source code is derived from the illumos localedef command, and
   6  * provided under BSD-style license terms by Nexenta Systems, Inc.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  */
  30
  31 /*
  32  * This file contains the "scanner", which tokenizes the input files
  33  * for localedef for processing by the higher level grammar processor.
  34  */
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <ctype.h>
  39 #include <limits.h>
  40 #include <string.h>
  41 #include <wchar.h>
  42 #include <sys/types.h>
  43 #include <assert.h>
  44 #include "localedef.h"
  45 #include "parser.h"
  46
  47 int                     com_char = '#';
  48 int                     esc_char = '\\';
  49 int                     mb_cur_min = 1;
  50 int                     mb_cur_max = 1;
  51 int                     lineno = 1;
  52 int                     warnings = 0;
  53 int                     is_stdin = 1;
  54 FILE                    *input;
  55 static int              nextline;
  56 //static FILE           *input = stdin;
  57 static const char       *filename = "<stdin>";
  58 static int              instring = 0;
  59 static int              escaped = 0;
  60
  61 /*
  62  * Token space ... grows on demand.
  63  */
  64 static char *token = NULL;
  65 static int tokidx;
  66 static int toksz = 0;
  67 static int hadtok = 0;
  68
  69 /*
  70  * Wide string space ... grows on demand.
  71  */
  72 static wchar_t *widestr = NULL;
  73 static int wideidx = 0;
  74 static int widesz = 0;
  75
  76 /*
  77  * The last keyword seen.  This is useful to trigger the special lexer rules
  78  * for "copy" and also collating symbols and elements.
  79  */
  80 int     last_kw = 0;
  81 static int      category = T_END;
  82
  83 static struct token {
  84         int id;
  85         const char *name;
  86 } keywords[] = {
  87         { T_COM_CHAR,           "comment_char" },
  88         { T_ESC_CHAR,           "escape_char" },
  89         { T_END,                "END" },
  90         { T_COPY,               "copy" },
  91         { T_MESSAGES,           "LC_MESSAGES" },
  92         { T_YESSTR,             "yesstr" },
  93         { T_YESEXPR,            "yesexpr" },
  94         { T_NOSTR,              "nostr" },
  95         { T_NOEXPR,             "noexpr" },
  96         { T_MONETARY,           "LC_MONETARY" },
  97         { T_INT_CURR_SYMBOL,    "int_curr_symbol" },
  98         { T_CURRENCY_SYMBOL,    "currency_symbol" },
  99         { T_MON_DECIMAL_POINT,  "mon_decimal_point" },
 100         { T_MON_THOUSANDS_SEP,  "mon_thousands_sep" },
 101         { T_POSITIVE_SIGN,      "positive_sign" },
 102         { T_NEGATIVE_SIGN,      "negative_sign" },
 103         { T_MON_GROUPING,       "mon_grouping" },
 104         { T_INT_FRAC_DIGITS,    "int_frac_digits" },
 105         { T_FRAC_DIGITS,        "frac_digits" },
 106         { T_P_CS_PRECEDES,      "p_cs_precedes" },
 107         { T_P_SEP_BY_SPACE,     "p_sep_by_space" },
 108         { T_N_CS_PRECEDES,      "n_cs_precedes" },
 109         { T_N_SEP_BY_SPACE,     "n_sep_by_space" },
 110         { T_P_SIGN_POSN,        "p_sign_posn" },
 111         { T_N_SIGN_POSN,        "n_sign_posn" },
 112         { T_INT_P_CS_PRECEDES,  "int_p_cs_precedes" },
 113         { T_INT_N_CS_PRECEDES,  "int_n_cs_precedes" },
 114         { T_INT_P_SEP_BY_SPACE, "int_p_sep_by_space" },
 115         { T_INT_N_SEP_BY_SPACE, "int_n_sep_by_space" },
 116         { T_INT_P_SIGN_POSN,    "int_p_sign_posn" },
 117         { T_INT_N_SIGN_POSN,    "int_n_sign_posn" },
 118         { T_COLLATE,            "LC_COLLATE" },
 119         { T_COLLATING_SYMBOL,   "collating-symbol" },
 120         { T_COLLATING_ELEMENT,  "collating-element" },
 121         { T_FROM,               "from" },
 122         { T_ORDER_START,        "order_start" },
 123         { T_ORDER_END,          "order_end" },
 124         { T_FORWARD,            "forward" },
 125         { T_BACKWARD,           "backward" },
 126         { T_POSITION,           "position" },
 127         { T_IGNORE,             "IGNORE" },
 128         { T_UNDEFINED,          "UNDEFINED" },
 129         { T_NUMERIC,            "LC_NUMERIC" },
 130         { T_DECIMAL_POINT,      "decimal_point" },
 131         { T_THOUSANDS_SEP,      "thousands_sep" },
 132         { T_GROUPING,           "grouping" },
 133         { T_TIME,               "LC_TIME" },
 134         { T_ABDAY,              "abday" },
 135         { T_DAY,                "day" },
 136         { T_ABMON,              "abmon" },
 137         { T_MON,                "mon" },
 138         { T_D_T_FMT,            "d_t_fmt" },
 139         { T_D_FMT,              "d_fmt" },
 140         { T_T_FMT,              "t_fmt" },
 141         { T_AM_PM,              "am_pm" },
 142         { T_T_FMT_AMPM,         "t_fmt_ampm" },
 143         { T_ERA,                "era" },
 144         { T_ERA_D_FMT,          "era_d_fmt" },
 145         { T_ERA_T_FMT,          "era_t_fmt" },
 146         { T_ERA_D_T_FMT,        "era_d_t_fmt" },
 147         { T_ALT_DIGITS,         "alt_digits" },
 148         { T_CTYPE,              "LC_CTYPE" },
 149         { T_ISUPPER,            "upper" },
 150         { T_ISLOWER,            "lower" },
 151         { T_ISALPHA,            "alpha" },
 152         { T_ISDIGIT,            "digit" },
 153         { T_ISPUNCT,            "punct" },
 154         { T_ISXDIGIT,           "xdigit" },
 155         { T_ISSPACE,            "space" },
 156         { T_ISPRINT,            "print" },
 157         { T_ISGRAPH,            "graph" },
 158         { T_ISBLANK,            "blank" },
 159         { T_ISCNTRL,            "cntrl" },
 160         /*
 161          * These entries are local additions, and not specified by
 162          * TOG.  Note that they are not guaranteed to be accurate for
 163          * all locales, and so applications should not depend on them.
 164          */
 165         { T_ISSPECIAL,          "special" },
 166         { T_ISENGLISH,          "english" },
 167         { T_ISPHONOGRAM,        "phonogram" },
 168         { T_ISIDEOGRAM,         "ideogram" },
 169         { T_ISNUMBER,           "number" },
 170         /*
 171          * We have to support this in the grammar, but it would be a
 172          * syntax error to define a character as one of these without
 173          * also defining it as an alpha or digit.  We ignore it in our
 174          * parsing.
 175          */
 176         { T_ISALNUM,            "alnum" },
 177         { T_TOUPPER,            "toupper" },
 178         { T_TOLOWER,            "tolower" },
 179
 180         /*
 181          * These are keywords used in the charmap file.  Note that
 182          * Solaris orginally used angle brackets to wrap some of them,
 183          * but we removed that to simplify our parser.  The first of these
 184          * items are "global items."
 185          */
 186         { T_CHARMAP,            "CHARMAP" },
 187         { T_WIDTH,              "WIDTH" },
 188
 189         { -1, NULL },
 190 };
 191
 192 /*
 193  * These special words are only used in a charmap file, enclosed in <>.
 194  */
 195 static struct token symwords[] = {
 196         { T_COM_CHAR,           "comment_char" },
 197         { T_ESC_CHAR,           "escape_char" },
 198         { T_CODE_SET,           "code_set_name" },
 199         { T_MB_CUR_MAX,         "mb_cur_max" },
 200         { T_MB_CUR_MIN,         "mb_cur_min" },
 201         { -1, NULL },
 202 };
 203
 204 static int categories[] = {
 205         T_CHARMAP,
 206         T_CTYPE,
 207         T_COLLATE,
 208         T_MESSAGES,
 209         T_MONETARY,
 210         T_NUMERIC,
 211         T_TIME,
 212         T_WIDTH,
 213         0
 214 };
 215
 216 void
 217 reset_scanner(const char *fname)
 218 {
 219         if (fname == NULL) {
 220                 filename = "<stdin>";
 221                 is_stdin = 1;
 222         } else {
 223                 if (!is_stdin)
 224                         (void) fclose(input);
 225                 if ((input = fopen(fname, "r")) == NULL) {
 226                         perror("fopen");
 227                         exit(4);
 228                 } else {
 229                         is_stdin = 0;
 230                 }
 231                 filename = fname;
 232         }
 233         com_char = '#';
 234         esc_char = '\\';
 235         instring = 0;
 236         escaped = 0;
 237         lineno = 1;
 238         nextline = 1;
 239         tokidx = 0;
 240         wideidx = 0;
 241 }
 242
 243 #define hex(x)  \
 244         (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
 245 #define isodigit(x)     ((x >= '0') && (x <= '7'))
 246
 247 static int
 248 scanc(void)
 249 {
 250         int     c;
 251
 252         if (is_stdin)
 253                 c = getc(stdin);
 254         else
 255                 c = getc(input);
 256         lineno = nextline;
 257         if (c == '\n') {
 258                 nextline++;
 259         }
 260         return (c);
 261 }
 262
 263 static void
 264 unscanc(int c)
 265 {
 266         if (c == '\n') {
 267                 nextline--;
 268         }
 269         if (ungetc(c, is_stdin ? stdin : input) < 0) {
 270                 yyerror("ungetc failed");
 271         }
 272 }
 273
 274 static int
 275 scan_hex_byte(void)
 276 {
 277         int     c1, c2;
 278         int     v;
 279
 280         c1 = scanc();
 281         if (!isxdigit(c1)) {
 282                 yyerror("malformed hex digit");
 283                 return (0);
 284         }
 285         c2 = scanc();
 286         if (!isxdigit(c2)) {
 287                 yyerror("malformed hex digit");
 288                 return (0);
 289         }
 290         v = ((hex(c1) << 4) | hex(c2));
 291         return (v);
 292 }
 293
 294 static int
 295 scan_dec_byte(void)
 296 {
 297         int     c1, c2, c3;
 298         int     b;
 299
 300         c1 = scanc();
 301         if (!isdigit(c1)) {
 302                 yyerror("malformed decimal digit");
 303                 return (0);
 304         }
 305         b = c1 - '0';
 306         c2 = scanc();
 307         if (!isdigit(c2)) {
 308                 yyerror("malformed decimal digit");
 309                 return (0);
 310         }
 311         b *= 10;
 312         b += (c2 - '0');
 313         c3 = scanc();
 314         if (!isdigit(c3)) {
 315                 unscanc(c3);
 316         } else {
 317                 b *= 10;
 318                 b += (c3 - '0');
 319         }
 320         return (b);
 321 }
 322
 323 static int
 324 scan_oct_byte(void)
 325 {
 326         int c1, c2, c3;
 327         int     b;
 328
 329         b = 0;
 330
 331         c1 = scanc();
 332         if (!isodigit(c1)) {
 333                 yyerror("malformed octal digit");
 334                 return (0);
 335         }
 336         b = c1 - '0';
 337         c2 = scanc();
 338         if (!isodigit(c2)) {
 339                 yyerror("malformed octal digit");
 340                 return (0);
 341         }
 342         b *= 8;
 343         b += (c2 - '0');
 344         c3 = scanc();
 345         if (!isodigit(c3)) {
 346                 unscanc(c3);
 347         } else {
 348                 b *= 8;
 349                 b += (c3 - '0');
 350         }
 351         return (b);
 352 }
 353
 354 void
 355 add_tok(int c)
 356 {
 357         if ((tokidx + 1) >= toksz) {
 358                 toksz += 64;
 359                 if ((token = realloc(token, toksz)) == NULL) {
 360                         yyerror("out of memory");
 361                         tokidx = 0;
 362                         toksz = 0;
 363                         return;
 364                 }
 365         }
 366
 367         token[tokidx++] = (char)c;
 368         token[tokidx] = 0;
 369 }
 370 void
 371 add_wcs(wchar_t c)
 372 {
 373         if ((wideidx + 1) >= widesz) {
 374                 widesz += 64;
 375                 widestr = realloc(widestr, (widesz * sizeof (wchar_t)));
 376                 if (widestr == NULL) {
 377                         yyerror("out of memory");
 378                         wideidx = 0;
 379                         widesz = 0;
 380                         return;
 381                 }
 382         }
 383
 384         widestr[wideidx++] = c;
 385         widestr[wideidx] = 0;
 386 }
 387
 388 wchar_t *
 389 get_wcs(void)
 390 {
 391         wchar_t *ws = widestr;
 392         wideidx = 0;
 393         widestr = NULL;
 394         widesz = 0;
 395         if (ws == NULL) {
 396                 if ((ws = wcsdup(L"")) == NULL) {
 397                         yyerror("out of memory");
 398                 }
 399         }
 400         return (ws);
 401 }
 402
 403 static int
 404 get_byte(void)
 405 {
 406         int     c;
 407
 408         if ((c = scanc()) != esc_char) {
 409                 unscanc(c);
 410                 return (EOF);
 411         }
 412         c = scanc();
 413
 414         switch (c) {
 415         case 'd':
 416         case 'D':
 417                 return (scan_dec_byte());
 418         case 'x':
 419         case 'X':
 420                 return (scan_hex_byte());
 421         case '0':
 422         case '1':
 423         case '2':
 424         case '3':
 425         case '4':
 426         case '5':
 427         case '6':
 428         case '7':
 429                 /* put the character back so we can get it */
 430                 unscanc(c);
 431                 return (scan_oct_byte());
 432         default:
 433                 unscanc(c);
 434                 unscanc(esc_char);
 435                 return (EOF);
 436         }
 437 }
 438
 439 int
 440 get_escaped(int c)
 441 {
 442         switch (c) {
 443         case 'n':
 444                 return ('\n');
 445         case 'r':
 446                 return ('\r');
 447         case 't':
 448                 return ('\t');
 449         case 'f':
 450                 return ('\f');
 451         case 'v':
 452                 return ('\v');
 453         case 'b':
 454                 return ('\b');
 455         case 'a':
 456                 return ('\a');
 457         default:
 458                 return (c);
 459         }
 460 }
 461
 462 int
 463 get_wide(void)
 464 {
 465         static char mbs[MB_LEN_MAX + 1] = "";
 466         static int mbi = 0;
 467         int c;
 468         wchar_t wc;
 469
 470         if (mb_cur_max >= (int)sizeof (mbs)) {
 471                 yyerror("max multibyte character size too big");
 472                 mbi = 0;
 473                 return (T_NULL);
 474         }
 475         for (;;) {
 476                 if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) {
 477                         /*
 478                          * end of the byte sequence reached, but no
 479                          * valid wide decoding.  fatal error.
 480                          */
 481                         mbi = 0;
 482                         yyerror("not a valid character encoding");
 483                         return (T_NULL);
 484                 }
 485                 mbs[mbi++] = c;
 486                 mbs[mbi] = 0;
 487
 488                 /* does it decode? */
 489                 if (to_wide(&wc, mbs) >= 0) {
 490                         break;
 491                 }
 492         }
 493
 494         mbi = 0;
 495         if ((category != T_CHARMAP) && (category != T_WIDTH)) {
 496                 if (check_charmap(wc) < 0) {
 497                         yyerror("no symbolic name for character");
 498                         return (T_NULL);
 499                 }
 500         }
 501
 502         yylval.wc = wc;
 503         return (T_CHAR);
 504 }
 505
 506 int
 507 get_symbol(void)
 508 {
 509         int     c;
 510
 511         while ((c = scanc()) != EOF) {
 512                 if (escaped) {
 513                         escaped = 0;
 514                         if (c == '\n')
 515                                 continue;
 516                         add_tok(get_escaped(c));
 517                         continue;
 518                 }
 519                 if (c == esc_char) {
 520                         escaped = 1;
 521                         continue;
 522                 }
 523                 if (c == '\n') {        /* well that's strange! */
 524                         yyerror("unterminated symbolic name");
 525                         continue;
 526                 }
 527                 if (c == '>') {         /* end of symbol */
 528
 529                         /*
 530                          * This restarts the token from the beginning
 531                          * the next time we scan a character.  (This
 532                          * token is complete.)
 533                          */
 534
 535                         if (token == NULL) {
 536                                 yyerror("missing symbolic name");
 537                                 return (T_NULL);
 538                         }
 539                         tokidx = 0;
 540
 541                         /*
 542                          * A few symbols are handled as keywords outside
 543                          * of the normal categories.
 544                          */
 545                         if (category == T_END) {
 546                                 int i;
 547                                 for (i = 0; symwords[i].name != 0; i++) {
 548                                         if (strcmp(token, symwords[i].name) ==
 549                                             0) {
 550                                                 last_kw = symwords[i].id;
 551                                                 return (last_kw);
 552                                         }
 553                                 }
 554                         }
 555                         /*
 556                          * Contextual rule: Only literal characters are
 557                          * permitted in CHARMAP.  Anywhere else the symbolic
 558                          * forms are fine.
 559                          */
 560                         if ((category != T_CHARMAP) &&
 561                             (lookup_charmap(token, &yylval.wc)) != -1) {
 562                                 return (T_CHAR);
 563                         }
 564                         if ((yylval.collsym = lookup_collsym(token)) != NULL) {
 565                                 return (T_COLLSYM);
 566                         }
 567                         if ((yylval.collelem = lookup_collelem(token)) !=
 568                             NULL) {
 569                                 return (T_COLLELEM);
 570                         }
 571                         /* its an undefined symbol */
 572                         yylval.token = strdup(token);
 573                         token = NULL;
 574                         toksz = 0;
 575                         tokidx = 0;
 576                         return (T_SYMBOL);
 577                 }
 578                 add_tok(c);
 579         }
 580
 581         yyerror("unterminated symbolic name");
 582         return (EOF);
 583 }
 584
 585 int
 586 get_category(void)
 587 {
 588         return (category);
 589 }
 590
 591 static int
 592 consume_token(void)
 593 {
 594         int     len = tokidx;
 595         int     i;
 596
 597         tokidx = 0;
 598         if (token == NULL)
 599                 return (T_NULL);
 600
 601         /*
 602          * this one is special, because we don't want it to alter the
 603          * last_kw field.
 604          */
 605         if (strcmp(token, "...") == 0) {
 606                 return (T_ELLIPSIS);
 607         }
 608
 609         /* search for reserved words first */
 610         for (i = 0; keywords[i].name; i++) {
 611                 int j;
 612                 if (strcmp(keywords[i].name, token) != 0) {
 613                         continue;
 614                 }
 615
 616                 last_kw = keywords[i].id;
 617
 618                 /* clear the top level category if we're done with it */
 619                 if (last_kw == T_END) {
 620                         category = T_END;
 621                 }
 622
 623                 /* set the top level category if we're changing */
 624                 for (j = 0; categories[j]; j++) {
 625                         if (categories[j] != last_kw)
 626                                 continue;
 627                         category = last_kw;
 628                 }
 629
 630                 return (keywords[i].id);
 631         }
 632
 633         /* maybe its a numeric constant? */
 634         if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
 635                 char *eptr;
 636                 yylval.num = strtol(token, &eptr, 10);
 637                 if (*eptr != 0)
 638                         yyerror("malformed number");
 639                 return (T_NUMBER);
 640         }
 641
 642         /*
 643          * A single lone character is treated as a character literal.
 644          * To avoid duplication of effort, we stick in the charmap.
 645          */
 646         if (len == 1) {
 647                 yylval.wc = token[0];
 648                 return (T_CHAR);
 649         }
 650
 651         /* anything else is treated as a symbolic name */
 652         yylval.token = strdup(token);
 653         token = NULL;
 654         toksz = 0;
 655         tokidx = 0;
 656         return (T_NAME);
 657 }
 658
 659 void
 660 scan_to_eol(void)
 661 {
 662         int     c;
 663         while ((c = scanc()) != '\n') {
 664                 if (c == EOF) {
 665                         /* end of file without newline! */
 666                         errf("missing newline");
 667                         return;
 668                 }
 669         }
 670         assert(c == '\n');
 671 }
 672
 673 int
 674 yylex(void)
 675 {
 676         int             c;
 677
 678         while ((c = scanc()) != EOF) {
 679
 680                 /* special handling for quoted string */
 681                 if (instring) {
 682                         if (escaped) {
 683                                 escaped = 0;
 684
 685                                 /* if newline, just eat and forget it */
 686                                 if (c == '\n')
 687                                         continue;
 688
 689                                 if (strchr("xXd01234567", c)) {
 690                                         unscanc(c);
 691                                         unscanc(esc_char);
 692                                         return (get_wide());
 693                                 }
 694                                 yylval.wc = get_escaped(c);
 695                                 return (T_CHAR);
 696                         }
 697                         if (c == esc_char) {
 698                                 escaped = 1;
 699                                 continue;
 700                         }
 701                         switch (c) {
 702                         case '<':
 703                                 return (get_symbol());
 704                         case '>':
 705                                 /* oops! should generate syntax error  */
 706                                 return (T_GT);
 707                         case '"':
 708                                 instring = 0;
 709                                 return (T_QUOTE);
 710                         default:
 711                                 yylval.wc = c;
 712                                 return (T_CHAR);
 713                         }
 714                 }
 715
 716                 /* escaped characters first */
 717                 if (escaped) {
 718                         escaped = 0;
 719                         if (c == '\n') {
 720                                 /* eat the newline */
 721                                 continue;
 722                         }
 723                         hadtok = 1;
 724                         if (tokidx) {
 725                                 /* an escape mid-token is nonsense */
 726                                 return (T_NULL);
 727                         }
 728
 729                         /* numeric escapes are treated as wide characters */
 730                         if (strchr("xXd01234567", c)) {
 731                                 unscanc(c);
 732                                 unscanc(esc_char);
 733                                 return (get_wide());
 734                         }
 735
 736                         add_tok(get_escaped(c));
 737                         continue;
 738                 }
 739
 740                 /* if it is the escape charter itself note it */
 741                 if (c == esc_char) {
 742                         escaped = 1;
 743                         continue;
 744                 }
 745
 746                 /* remove from the comment char to end of line */
 747                 if (c == com_char) {
 748                         while (c != '\n') {
 749                                 if ((c = scanc()) == EOF) {
 750                                         /* end of file without newline! */
 751                                         return (EOF);
 752                                 }
 753                         }
 754                         assert(c == '\n');
 755                         if (!hadtok) {
 756                                 /*
 757                                  * If there were no tokens on this line,
 758                                  * then just pretend it didn't exist at all.
 759                                  */
 760                                 continue;
 761                         }
 762                         hadtok = 0;
 763                         return (T_NL);
 764                 }
 765
 766                 if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
 767                         /*
 768                          * These are all token delimiters.  If there
 769                          * is a token already in progress, we need to
 770                          * process it.
 771                          */
 772                         unscanc(c);
 773                         return (consume_token());
 774                 }
 775
 776                 switch (c) {
 777                 case '\n':
 778                         if (!hadtok) {
 779                                 /*
 780                                  * If the line was completely devoid of tokens,
 781                                  * then just ignore it.
 782                                  */
 783                                 continue;
 784                         }
 785                         /* we're starting a new line, reset the token state */
 786                         hadtok = 0;
 787                         return (T_NL);
 788                 case ',':
 789                         hadtok = 1;
 790                         return (T_COMMA);
 791                 case ';':
 792                         hadtok = 1;
 793                         return (T_SEMI);
 794                 case '(':
 795                         hadtok = 1;
 796                         return (T_LPAREN);
 797                 case ')':
 798                         hadtok = 1;
 799                         return (T_RPAREN);
 800                 case '>':
 801                         hadtok = 1;
 802                         return (T_GT);
 803                 case '<':
 804                         /* symbol start! */
 805                         hadtok = 1;
 806                         return (get_symbol());
 807                 case ' ':
 808                 case '\t':
 809                         /* whitespace, just ignore it */
 810                         continue;
 811                 case '"':
 812                         hadtok = 1;
 813                         instring = 1;
 814                         return (T_QUOTE);
 815                 default:
 816                         hadtok = 1;
 817                         add_tok(c);
 818                         continue;
 819                 }
 820         }
 821         return (EOF);
 822 }
 823
 824 void
 825 yyerror(const char *msg)
 826 {
 827         (void) fprintf(stderr, "%s: %d: error: %s\n",
 828             filename, lineno, msg);
 829         exit(4);
 830 }
 831
 832 __printflike(1, 2) void
 833 errf(const char *fmt, ...)
 834 {
 835         char    *msg;
 836
 837         va_list va;
 838         va_start(va, fmt);
 839         (void) vasprintf(&msg, fmt, va);
 840         va_end(va);
 841
 842         (void) fprintf(stderr, "%s: %d: error: %s\n",
 843             filename, lineno, msg);
 844         free(msg);
 845         exit(4);
 846 }
 847
 848 __printflike(1, 2) void
 849 warn(const char *fmt, ...)
 850 {
 851         char    *msg;
 852
 853         va_list va;
 854         va_start(va, fmt);
 855         (void) vasprintf(&msg, fmt, va);
 856         va_end(va);
 857
 858         (void) fprintf(stderr, "%s: %d: warning: %s\n",
 859             filename, lineno, msg);
 860         free(msg);
 861         warnings++;
 862         if (!warnok)
 863                 exit(4);
 864 }