usr.bin/localedef/scanner.c

   1 /*
   2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright 2015 John Marino <draco@marino.st>
   4  *
   5  * This source code is derived from the illumos localedef command, and
   6  * provided under BSD-style license terms by Nexenta Systems, Inc.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  */
  30
  31 /*
  32  * This file contains the "scanner", which tokenizes the input files
  33  * for localedef for processing by the higher level grammar processor.
  34  */
  35 #include <sys/cdefs.h>
  36 __FBSDID("$FreeBSD$");
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <ctype.h>
  41 #include <limits.h>
  42 #include <string.h>
  43 #include <wchar.h>
  44 #include <sys/types.h>
  45 #include <assert.h>
  46 #include "localedef.h"
  47 #include "parser.h"
  48
  49 int                     com_char = '#';
  50 int                     esc_char = '\\';
  51 int                     mb_cur_min = 1;
  52 int                     mb_cur_max = 1;
  53 int                     lineno = 1;
  54 int                     warnings = 0;
  55 int                     is_stdin = 1;
  56 FILE                    *input;
  57 static int              nextline;
  58 //static FILE           *input = stdin;
  59 static const char       *filename = "<stdin>";
  60 static int              instring = 0;
  61 static int              escaped = 0;
  62
  63 /*
  64  * Token space ... grows on demand.
  65  */
  66 static char *token = NULL;
  67 static int tokidx;
  68 static int toksz = 0;
  69 static int hadtok = 0;
  70
  71 /*
  72  * Wide string space ... grows on demand.
  73  */
  74 static wchar_t *widestr = NULL;
  75 static int wideidx = 0;
  76 static int widesz = 0;
  77
  78 /*
  79  * The last keyword seen.  This is useful to trigger the special lexer rules
  80  * for "copy" and also collating symbols and elements.
  81  */
  82 int     last_kw = 0;
  83 static int      category = T_END;
  84
  85 static struct token {
  86         int id;
  87         const char *name;
  88 } keywords[] = {
  89         { T_COM_CHAR,           "comment_char" },
  90         { T_ESC_CHAR,           "escape_char" },
  91         { T_END,                "END" },
  92         { T_COPY,               "copy" },
  93         { T_MESSAGES,           "LC_MESSAGES" },
  94         { T_YESSTR,             "yesstr" },
  95         { T_YESEXPR,            "yesexpr" },
  96         { T_NOSTR,              "nostr" },
  97         { T_NOEXPR,             "noexpr" },
  98         { T_MONETARY,           "LC_MONETARY" },
  99         { T_INT_CURR_SYMBOL,    "int_curr_symbol" },
 100         { T_CURRENCY_SYMBOL,    "currency_symbol" },
 101         { T_MON_DECIMAL_POINT,  "mon_decimal_point" },
 102         { T_MON_THOUSANDS_SEP,  "mon_thousands_sep" },
 103         { T_POSITIVE_SIGN,      "positive_sign" },
 104         { T_NEGATIVE_SIGN,      "negative_sign" },
 105         { T_MON_GROUPING,       "mon_grouping" },
 106         { T_INT_FRAC_DIGITS,    "int_frac_digits" },
 107         { T_FRAC_DIGITS,        "frac_digits" },
 108         { T_P_CS_PRECEDES,      "p_cs_precedes" },
 109         { T_P_SEP_BY_SPACE,     "p_sep_by_space" },
 110         { T_N_CS_PRECEDES,      "n_cs_precedes" },
 111         { T_N_SEP_BY_SPACE,     "n_sep_by_space" },
 112         { T_P_SIGN_POSN,        "p_sign_posn" },
 113         { T_N_SIGN_POSN,        "n_sign_posn" },
 114         { T_INT_P_CS_PRECEDES,  "int_p_cs_precedes" },
 115         { T_INT_N_CS_PRECEDES,  "int_n_cs_precedes" },
 116         { T_INT_P_SEP_BY_SPACE, "int_p_sep_by_space" },
 117         { T_INT_N_SEP_BY_SPACE, "int_n_sep_by_space" },
 118         { T_INT_P_SIGN_POSN,    "int_p_sign_posn" },
 119         { T_INT_N_SIGN_POSN,    "int_n_sign_posn" },
 120         { T_COLLATE,            "LC_COLLATE" },
 121         { T_COLLATING_SYMBOL,   "collating-symbol" },
 122         { T_COLLATING_ELEMENT,  "collating-element" },
 123         { T_FROM,               "from" },
 124         { T_ORDER_START,        "order_start" },
 125         { T_ORDER_END,          "order_end" },
 126         { T_FORWARD,            "forward" },
 127         { T_BACKWARD,           "backward" },
 128         { T_POSITION,           "position" },
 129         { T_IGNORE,             "IGNORE" },
 130         { T_UNDEFINED,          "UNDEFINED" },
 131         { T_NUMERIC,            "LC_NUMERIC" },
 132         { T_DECIMAL_POINT,      "decimal_point" },
 133         { T_THOUSANDS_SEP,      "thousands_sep" },
 134         { T_GROUPING,           "grouping" },
 135         { T_TIME,               "LC_TIME" },
 136         { T_ABDAY,              "abday" },
 137         { T_DAY,                "day" },
 138         { T_ABMON,              "abmon" },
 139         { T_MON,                "mon" },
 140         { T_D_T_FMT,            "d_t_fmt" },
 141         { T_D_FMT,              "d_fmt" },
 142         { T_T_FMT,              "t_fmt" },
 143         { T_AM_PM,              "am_pm" },
 144         { T_T_FMT_AMPM,         "t_fmt_ampm" },
 145         { T_ERA,                "era" },
 146         { T_ERA_D_FMT,          "era_d_fmt" },
 147         { T_ERA_T_FMT,          "era_t_fmt" },
 148         { T_ERA_D_T_FMT,        "era_d_t_fmt" },
 149         { T_ALT_DIGITS,         "alt_digits" },
 150         { T_CTYPE,              "LC_CTYPE" },
 151         { T_ISUPPER,            "upper" },
 152         { T_ISLOWER,            "lower" },
 153         { T_ISALPHA,            "alpha" },
 154         { T_ISDIGIT,            "digit" },
 155         { T_ISPUNCT,            "punct" },
 156         { T_ISXDIGIT,           "xdigit" },
 157         { T_ISSPACE,            "space" },
 158         { T_ISPRINT,            "print" },
 159         { T_ISGRAPH,            "graph" },
 160         { T_ISBLANK,            "blank" },
 161         { T_ISCNTRL,            "cntrl" },
 162         /*
 163          * These entries are local additions, and not specified by
 164          * TOG.  Note that they are not guaranteed to be accurate for
 165          * all locales, and so applications should not depend on them.
 166          */
 167         { T_ISSPECIAL,          "special" },
 168         { T_ISENGLISH,          "english" },
 169         { T_ISPHONOGRAM,        "phonogram" },
 170         { T_ISIDEOGRAM,         "ideogram" },
 171         { T_ISNUMBER,           "number" },
 172         /*
 173          * We have to support this in the grammar, but it would be a
 174          * syntax error to define a character as one of these without
 175          * also defining it as an alpha or digit.  We ignore it in our
 176          * parsing.
 177          */
 178         { T_ISALNUM,            "alnum" },
 179         { T_TOUPPER,            "toupper" },
 180         { T_TOLOWER,            "tolower" },
 181
 182         /*
 183          * These are keywords used in the charmap file.  Note that
 184          * Solaris originally used angle brackets to wrap some of them,
 185          * but we removed that to simplify our parser.  The first of these
 186          * items are "global items."
 187          */
 188         { T_CHARMAP,            "CHARMAP" },
 189         { T_WIDTH,              "WIDTH" },
 190
 191         { -1, NULL },
 192 };
 193
 194 /*
 195  * These special words are only used in a charmap file, enclosed in <>.
 196  */
 197 static struct token symwords[] = {
 198         { T_COM_CHAR,           "comment_char" },
 199         { T_ESC_CHAR,           "escape_char" },
 200         { T_CODE_SET,           "code_set_name" },
 201         { T_MB_CUR_MAX,         "mb_cur_max" },
 202         { T_MB_CUR_MIN,         "mb_cur_min" },
 203         { -1, NULL },
 204 };
 205
 206 static int categories[] = {
 207         T_CHARMAP,
 208         T_CTYPE,
 209         T_COLLATE,
 210         T_MESSAGES,
 211         T_MONETARY,
 212         T_NUMERIC,
 213         T_TIME,
 214         T_WIDTH,
 215         0
 216 };
 217
 218 void
 219 reset_scanner(const char *fname)
 220 {
 221         if (fname == NULL) {
 222                 filename = "<stdin>";
 223                 is_stdin = 1;
 224         } else {
 225                 if (!is_stdin)
 226                         (void) fclose(input);
 227                 if ((input = fopen(fname, "r")) == NULL) {
 228                         perror("fopen");
 229                         exit(4);
 230                 } else {
 231                         is_stdin = 0;
 232                 }
 233                 filename = fname;
 234         }
 235         com_char = '#';
 236         esc_char = '\\';
 237         instring = 0;
 238         escaped = 0;
 239         lineno = 1;
 240         nextline = 1;
 241         tokidx = 0;
 242         wideidx = 0;
 243 }
 244
 245 #define hex(x)  \
 246         (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
 247 #define isodigit(x)     ((x >= '0') && (x <= '7'))
 248
 249 static int
 250 scanc(void)
 251 {
 252         int     c;
 253
 254         if (is_stdin)
 255                 c = getc(stdin);
 256         else
 257                 c = getc(input);
 258         lineno = nextline;
 259         if (c == '\n') {
 260                 nextline++;
 261         }
 262         return (c);
 263 }
 264
 265 static void
 266 unscanc(int c)
 267 {
 268         if (c == '\n') {
 269                 nextline--;
 270         }
 271         if (ungetc(c, is_stdin ? stdin : input) < 0) {
 272                 yyerror("ungetc failed");
 273         }
 274 }
 275
 276 static int
 277 scan_hex_byte(void)
 278 {
 279         int     c1, c2;
 280         int     v;
 281
 282         c1 = scanc();
 283         if (!isxdigit(c1)) {
 284                 yyerror("malformed hex digit");
 285                 return (0);
 286         }
 287         c2 = scanc();
 288         if (!isxdigit(c2)) {
 289                 yyerror("malformed hex digit");
 290                 return (0);
 291         }
 292         v = ((hex(c1) << 4) | hex(c2));
 293         return (v);
 294 }
 295
 296 static int
 297 scan_dec_byte(void)
 298 {
 299         int     c1, c2, c3;
 300         int     b;
 301
 302         c1 = scanc();
 303         if (!isdigit(c1)) {
 304                 yyerror("malformed decimal digit");
 305                 return (0);
 306         }
 307         b = c1 - '0';
 308         c2 = scanc();
 309         if (!isdigit(c2)) {
 310                 yyerror("malformed decimal digit");
 311                 return (0);
 312         }
 313         b *= 10;
 314         b += (c2 - '0');
 315         c3 = scanc();
 316         if (!isdigit(c3)) {
 317                 unscanc(c3);
 318         } else {
 319                 b *= 10;
 320                 b += (c3 - '0');
 321         }
 322         return (b);
 323 }
 324
 325 static int
 326 scan_oct_byte(void)
 327 {
 328         int c1, c2, c3;
 329         int     b;
 330
 331         b = 0;
 332
 333         c1 = scanc();
 334         if (!isodigit(c1)) {
 335                 yyerror("malformed octal digit");
 336                 return (0);
 337         }
 338         b = c1 - '0';
 339         c2 = scanc();
 340         if (!isodigit(c2)) {
 341                 yyerror("malformed octal digit");
 342                 return (0);
 343         }
 344         b *= 8;
 345         b += (c2 - '0');
 346         c3 = scanc();
 347         if (!isodigit(c3)) {
 348                 unscanc(c3);
 349         } else {
 350                 b *= 8;
 351                 b += (c3 - '0');
 352         }
 353         return (b);
 354 }
 355
 356 void
 357 add_tok(int c)
 358 {
 359         if ((tokidx + 1) >= toksz) {
 360                 toksz += 64;
 361                 if ((token = realloc(token, toksz)) == NULL) {
 362                         yyerror("out of memory");
 363                         tokidx = 0;
 364                         toksz = 0;
 365                         return;
 366                 }
 367         }
 368
 369         token[tokidx++] = (char)c;
 370         token[tokidx] = 0;
 371 }
 372 void
 373 add_wcs(wchar_t c)
 374 {
 375         if ((wideidx + 1) >= widesz) {
 376                 widesz += 64;
 377                 widestr = realloc(widestr, (widesz * sizeof (wchar_t)));
 378                 if (widestr == NULL) {
 379                         yyerror("out of memory");
 380                         wideidx = 0;
 381                         widesz = 0;
 382                         return;
 383                 }
 384         }
 385
 386         widestr[wideidx++] = c;
 387         widestr[wideidx] = 0;
 388 }
 389
 390 wchar_t *
 391 get_wcs(void)
 392 {
 393         wchar_t *ws = widestr;
 394         wideidx = 0;
 395         widestr = NULL;
 396         widesz = 0;
 397         if (ws == NULL) {
 398                 if ((ws = wcsdup(L"")) == NULL) {
 399                         yyerror("out of memory");
 400                 }
 401         }
 402         return (ws);
 403 }
 404
 405 static int
 406 get_byte(void)
 407 {
 408         int     c;
 409
 410         if ((c = scanc()) != esc_char) {
 411                 unscanc(c);
 412                 return (EOF);
 413         }
 414         c = scanc();
 415
 416         switch (c) {
 417         case 'd':
 418         case 'D':
 419                 return (scan_dec_byte());
 420         case 'x':
 421         case 'X':
 422                 return (scan_hex_byte());
 423         case '0':
 424         case '1':
 425         case '2':
 426         case '3':
 427         case '4':
 428         case '5':
 429         case '6':
 430         case '7':
 431                 /* put the character back so we can get it */
 432                 unscanc(c);
 433                 return (scan_oct_byte());
 434         default:
 435                 unscanc(c);
 436                 unscanc(esc_char);
 437                 return (EOF);
 438         }
 439 }
 440
 441 int
 442 get_escaped(int c)
 443 {
 444         switch (c) {
 445         case 'n':
 446                 return ('\n');
 447         case 'r':
 448                 return ('\r');
 449         case 't':
 450                 return ('\t');
 451         case 'f':
 452                 return ('\f');
 453         case 'v':
 454                 return ('\v');
 455         case 'b':
 456                 return ('\b');
 457         case 'a':
 458                 return ('\a');
 459         default:
 460                 return (c);
 461         }
 462 }
 463
 464 int
 465 get_wide(void)
 466 {
 467         static char mbs[MB_LEN_MAX + 1] = "";
 468         static int mbi = 0;
 469         int c;
 470         wchar_t wc;
 471
 472         if (mb_cur_max >= (int)sizeof (mbs)) {
 473                 yyerror("max multibyte character size too big");
 474                 mbi = 0;
 475                 return (T_NULL);
 476         }
 477         for (;;) {
 478                 if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) {
 479                         /*
 480                          * end of the byte sequence reached, but no
 481                          * valid wide decoding.  fatal error.
 482                          */
 483                         mbi = 0;
 484                         yyerror("not a valid character encoding");
 485                         return (T_NULL);
 486                 }
 487                 mbs[mbi++] = c;
 488                 mbs[mbi] = 0;
 489
 490                 /* does it decode? */
 491                 if (to_wide(&wc, mbs) >= 0) {
 492                         break;
 493                 }
 494         }
 495
 496         mbi = 0;
 497         if ((category != T_CHARMAP) && (category != T_WIDTH)) {
 498                 if (check_charmap(wc) < 0) {
 499                         yyerror("no symbolic name for character");
 500                         return (T_NULL);
 501                 }
 502         }
 503
 504         yylval.wc = wc;
 505         return (T_CHAR);
 506 }
 507
 508 int
 509 get_symbol(void)
 510 {
 511         int     c;
 512
 513         while ((c = scanc()) != EOF) {
 514                 if (escaped) {
 515                         escaped = 0;
 516                         if (c == '\n')
 517                                 continue;
 518                         add_tok(get_escaped(c));
 519                         continue;
 520                 }
 521                 if (c == esc_char) {
 522                         escaped = 1;
 523                         continue;
 524                 }
 525                 if (c == '\n') {        /* well that's strange! */
 526                         yyerror("unterminated symbolic name");
 527                         continue;
 528                 }
 529                 if (c == '>') {         /* end of symbol */
 530
 531                         /*
 532                          * This restarts the token from the beginning
 533                          * the next time we scan a character.  (This
 534                          * token is complete.)
 535                          */
 536
 537                         if (token == NULL) {
 538                                 yyerror("missing symbolic name");
 539                                 return (T_NULL);
 540                         }
 541                         tokidx = 0;
 542
 543                         /*
 544                          * A few symbols are handled as keywords outside
 545                          * of the normal categories.
 546                          */
 547                         if (category == T_END) {
 548                                 int i;
 549                                 for (i = 0; symwords[i].name != 0; i++) {
 550                                         if (strcmp(token, symwords[i].name) ==
 551                                             0) {
 552                                                 last_kw = symwords[i].id;
 553                                                 return (last_kw);
 554                                         }
 555                                 }
 556                         }
 557                         /*
 558                          * Contextual rule: Only literal characters are
 559                          * permitted in CHARMAP.  Anywhere else the symbolic
 560                          * forms are fine.
 561                          */
 562                         if ((category != T_CHARMAP) &&
 563                             (lookup_charmap(token, &yylval.wc)) != -1) {
 564                                 return (T_CHAR);
 565                         }
 566                         if ((yylval.collsym = lookup_collsym(token)) != NULL) {
 567                                 return (T_COLLSYM);
 568                         }
 569                         if ((yylval.collelem = lookup_collelem(token)) !=
 570                             NULL) {
 571                                 return (T_COLLELEM);
 572                         }
 573                         /* its an undefined symbol */
 574                         yylval.token = strdup(token);
 575                         token = NULL;
 576                         toksz = 0;
 577                         tokidx = 0;
 578                         return (T_SYMBOL);
 579                 }
 580                 add_tok(c);
 581         }
 582
 583         yyerror("unterminated symbolic name");
 584         return (EOF);
 585 }
 586
 587 int
 588 get_category(void)
 589 {
 590         return (category);
 591 }
 592
 593 static int
 594 consume_token(void)
 595 {
 596         int     len = tokidx;
 597         int     i;
 598
 599         tokidx = 0;
 600         if (token == NULL)
 601                 return (T_NULL);
 602
 603         /*
 604          * this one is special, because we don't want it to alter the
 605          * last_kw field.
 606          */
 607         if (strcmp(token, "...") == 0) {
 608                 return (T_ELLIPSIS);
 609         }
 610
 611         /* search for reserved words first */
 612         for (i = 0; keywords[i].name; i++) {
 613                 int j;
 614                 if (strcmp(keywords[i].name, token) != 0) {
 615                         continue;
 616                 }
 617
 618                 last_kw = keywords[i].id;
 619
 620                 /* clear the top level category if we're done with it */
 621                 if (last_kw == T_END) {
 622                         category = T_END;
 623                 }
 624
 625                 /* set the top level category if we're changing */
 626                 for (j = 0; categories[j]; j++) {
 627                         if (categories[j] != last_kw)
 628                                 continue;
 629                         category = last_kw;
 630                 }
 631
 632                 return (keywords[i].id);
 633         }
 634
 635         /* maybe its a numeric constant? */
 636         if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
 637                 char *eptr;
 638                 yylval.num = strtol(token, &eptr, 10);
 639                 if (*eptr != 0)
 640                         yyerror("malformed number");
 641                 return (T_NUMBER);
 642         }
 643
 644         /*
 645          * A single lone character is treated as a character literal.
 646          * To avoid duplication of effort, we stick in the charmap.
 647          */
 648         if (len == 1) {
 649                 yylval.wc = token[0];
 650                 return (T_CHAR);
 651         }
 652
 653         /* anything else is treated as a symbolic name */
 654         yylval.token = strdup(token);
 655         token = NULL;
 656         toksz = 0;
 657         tokidx = 0;
 658         return (T_NAME);
 659 }
 660
 661 void
 662 scan_to_eol(void)
 663 {
 664         int     c;
 665         while ((c = scanc()) != '\n') {
 666                 if (c == EOF) {
 667                         /* end of file without newline! */
 668                         errf("missing newline");
 669                         return;
 670                 }
 671         }
 672         assert(c == '\n');
 673 }
 674
 675 int
 676 yylex(void)
 677 {
 678         int             c;
 679
 680         while ((c = scanc()) != EOF) {
 681
 682                 /* special handling for quoted string */
 683                 if (instring) {
 684                         if (escaped) {
 685                                 escaped = 0;
 686
 687                                 /* if newline, just eat and forget it */
 688                                 if (c == '\n')
 689                                         continue;
 690
 691                                 if (strchr("xXd01234567", c)) {
 692                                         unscanc(c);
 693                                         unscanc(esc_char);
 694                                         return (get_wide());
 695                                 }
 696                                 yylval.wc = get_escaped(c);
 697                                 return (T_CHAR);
 698                         }
 699                         if (c == esc_char) {
 700                                 escaped = 1;
 701                                 continue;
 702                         }
 703                         switch (c) {
 704                         case '<':
 705                                 return (get_symbol());
 706                         case '>':
 707                                 /* oops! should generate syntax error  */
 708                                 return (T_GT);
 709                         case '"':
 710                                 instring = 0;
 711                                 return (T_QUOTE);
 712                         default:
 713                                 yylval.wc = c;
 714                                 return (T_CHAR);
 715                         }
 716                 }
 717
 718                 /* escaped characters first */
 719                 if (escaped) {
 720                         escaped = 0;
 721                         if (c == '\n') {
 722                                 /* eat the newline */
 723                                 continue;
 724                         }
 725                         hadtok = 1;
 726                         if (tokidx) {
 727                                 /* an escape mid-token is nonsense */
 728                                 return (T_NULL);
 729                         }
 730
 731                         /* numeric escapes are treated as wide characters */
 732                         if (strchr("xXd01234567", c)) {
 733                                 unscanc(c);
 734                                 unscanc(esc_char);
 735                                 return (get_wide());
 736                         }
 737
 738                         add_tok(get_escaped(c));
 739                         continue;
 740                 }
 741
 742                 /* if it is the escape charter itself note it */
 743                 if (c == esc_char) {
 744                         escaped = 1;
 745                         continue;
 746                 }
 747
 748                 /* remove from the comment char to end of line */
 749                 if (c == com_char) {
 750                         while (c != '\n') {
 751                                 if ((c = scanc()) == EOF) {
 752                                         /* end of file without newline! */
 753                                         return (EOF);
 754                                 }
 755                         }
 756                         assert(c == '\n');
 757                         if (!hadtok) {
 758                                 /*
 759                                  * If there were no tokens on this line,
 760                                  * then just pretend it didn't exist at all.
 761                                  */
 762                                 continue;
 763                         }
 764                         hadtok = 0;
 765                         return (T_NL);
 766                 }
 767
 768                 if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
 769                         /*
 770                          * These are all token delimiters.  If there
 771                          * is a token already in progress, we need to
 772                          * process it.
 773                          */
 774                         unscanc(c);
 775                         return (consume_token());
 776                 }
 777
 778                 switch (c) {
 779                 case '\n':
 780                         if (!hadtok) {
 781                                 /*
 782                                  * If the line was completely devoid of tokens,
 783                                  * then just ignore it.
 784                                  */
 785                                 continue;
 786                         }
 787                         /* we're starting a new line, reset the token state */
 788                         hadtok = 0;
 789                         return (T_NL);
 790                 case ',':
 791                         hadtok = 1;
 792                         return (T_COMMA);
 793                 case ';':
 794                         hadtok = 1;
 795                         return (T_SEMI);
 796                 case '(':
 797                         hadtok = 1;
 798                         return (T_LPAREN);
 799                 case ')':
 800                         hadtok = 1;
 801                         return (T_RPAREN);
 802                 case '>':
 803                         hadtok = 1;
 804                         return (T_GT);
 805                 case '<':
 806                         /* symbol start! */
 807                         hadtok = 1;
 808                         return (get_symbol());
 809                 case ' ':
 810                 case '\t':
 811                         /* whitespace, just ignore it */
 812                         continue;
 813                 case '"':
 814                         hadtok = 1;
 815                         instring = 1;
 816                         return (T_QUOTE);
 817                 default:
 818                         hadtok = 1;
 819                         add_tok(c);
 820                         continue;
 821                 }
 822         }
 823         return (EOF);
 824 }
 825
 826 void
 827 yyerror(const char *msg)
 828 {
 829         (void) fprintf(stderr, "%s: %d: error: %s\n",
 830             filename, lineno, msg);
 831         exit(4);
 832 }
 833
 834 void
 835 errf(const char *fmt, ...)
 836 {
 837         char    *msg;
 838
 839         va_list va;
 840         va_start(va, fmt);
 841         (void) vasprintf(&msg, fmt, va);
 842         va_end(va);
 843
 844         (void) fprintf(stderr, "%s: %d: error: %s\n",
 845             filename, lineno, msg);
 846         free(msg);
 847         exit(4);
 848 }
 849
 850 void
 851 warn(const char *fmt, ...)
 852 {
 853         char    *msg;
 854
 855         va_list va;
 856         va_start(va, fmt);
 857         (void) vasprintf(&msg, fmt, va);
 858         va_end(va);
 859
 860         (void) fprintf(stderr, "%s: %d: warning: %s\n",
 861             filename, lineno, msg);
 862         free(msg);
 863         warnings++;
 864         if (!warnok)
 865                 exit(4);
 866 }