("" < 3.4) always evaluates to true, which unconditionally
[dragonfly.git] / contrib / awk20040207 / lex.c
blob1766d71690ce16df858d17f81a6b1310ad270c96
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "ytab.h"
32 extern YYSTYPE yylval;
33 extern int infunc;
35 int lineno = 1;
36 int bracecnt = 0;
37 int brackcnt = 0;
38 int parencnt = 0;
40 typedef struct Keyword {
41 const char *word;
42 int sub;
43 int type;
44 } Keyword;
46 Keyword keywords[] ={ /* keep sorted: binary searched */
47 { "BEGIN", XBEGIN, XBEGIN },
48 { "END", XEND, XEND },
49 { "NF", VARNF, VARNF },
50 { "atan2", FATAN, BLTIN },
51 { "break", BREAK, BREAK },
52 { "close", CLOSE, CLOSE },
53 { "continue", CONTINUE, CONTINUE },
54 { "cos", FCOS, BLTIN },
55 { "delete", DELETE, DELETE },
56 { "do", DO, DO },
57 { "else", ELSE, ELSE },
58 { "exit", EXIT, EXIT },
59 { "exp", FEXP, BLTIN },
60 { "fflush", FFLUSH, BLTIN },
61 { "for", FOR, FOR },
62 { "func", FUNC, FUNC },
63 { "function", FUNC, FUNC },
64 { "getline", GETLINE, GETLINE },
65 { "gsub", GSUB, GSUB },
66 { "if", IF, IF },
67 { "in", IN, IN },
68 { "index", INDEX, INDEX },
69 { "int", FINT, BLTIN },
70 { "length", FLENGTH, BLTIN },
71 { "log", FLOG, BLTIN },
72 { "match", MATCHFCN, MATCHFCN },
73 { "next", NEXT, NEXT },
74 { "nextfile", NEXTFILE, NEXTFILE },
75 { "print", PRINT, PRINT },
76 { "printf", PRINTF, PRINTF },
77 { "rand", FRAND, BLTIN },
78 { "return", RETURN, RETURN },
79 { "sin", FSIN, BLTIN },
80 { "split", SPLIT, SPLIT },
81 { "sprintf", SPRINTF, SPRINTF },
82 { "sqrt", FSQRT, BLTIN },
83 { "srand", FSRAND, BLTIN },
84 { "sub", SUB, SUB },
85 { "substr", SUBSTR, SUBSTR },
86 { "system", FSYSTEM, BLTIN },
87 { "tolower", FTOLOWER, BLTIN },
88 { "toupper", FTOUPPER, BLTIN },
89 { "while", WHILE, WHILE },
92 #define DEBUG
93 #ifdef DEBUG
94 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
95 #else
96 #define RET(x) return(x)
97 #endif
99 int peek(void)
101 int c = input();
102 unput(c);
103 return c;
106 int gettok(char **pbuf, int *psz) /* get next input token */
108 int c, retc;
109 char *buf = *pbuf;
110 int sz = *psz;
111 char *bp = buf;
113 c = input();
114 if (c == 0)
115 return 0;
116 buf[0] = c;
117 buf[1] = 0;
118 if (!isalnum(c) && c != '.' && c != '_')
119 return c;
121 *bp++ = c;
122 if (isalpha(c) || c == '_') { /* it's a varname */
123 for ( ; (c = input()) != 0; ) {
124 if (bp-buf >= sz)
125 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
126 FATAL( "out of space for name %.10s...", buf );
127 if (isalnum(c) || c == '_')
128 *bp++ = c;
129 else {
130 *bp = 0;
131 unput(c);
132 break;
135 *bp = 0;
136 retc = 'a'; /* alphanumeric */
137 } else { /* it's a number */
138 char *rem;
139 /* read input until can't be a number */
140 for ( ; (c = input()) != 0; ) {
141 if (bp-buf >= sz)
142 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
143 FATAL( "out of space for number %.10s...", buf );
144 if (isdigit(c) || c == 'e' || c == 'E'
145 || c == '.' || c == '+' || c == '-')
146 *bp++ = c;
147 else {
148 unput(c);
149 break;
152 *bp = 0;
153 strtod(buf, &rem); /* parse the number */
154 unputstr(rem); /* put rest back for later */
155 if (rem == buf) { /* it wasn't a valid number at all */
156 buf[1] = 0; /* so return one character as token */
157 retc = buf[0]; /* character is its own type */
158 } else { /* some prefix was a number */
159 rem[0] = 0; /* so truncate where failure started */
160 retc = '0'; /* number */
163 *pbuf = buf;
164 *psz = sz;
165 return retc;
168 int word(char *);
169 int string(void);
170 int regexpr(void);
171 int sc = 0; /* 1 => return a } right now */
172 int reg = 0; /* 1 => return a REGEXPR now */
174 int yylex(void)
176 int c;
177 static char *buf = 0;
178 static int bufsize = 500;
180 if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
181 FATAL( "out of space in yylex" );
182 if (sc) {
183 sc = 0;
184 RET('}');
186 if (reg) {
187 reg = 0;
188 return regexpr();
190 for (;;) {
191 c = gettok(&buf, &bufsize);
192 if (c == 0)
193 return 0;
194 if (isalpha(c) || c == '_')
195 return word(buf);
196 if (isdigit(c)) {
197 yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
198 /* should this also have STR set? */
199 RET(NUMBER);
202 yylval.i = c;
203 switch (c) {
204 case '\n': /* {EOL} */
205 RET(NL);
206 case '\r': /* assume \n is coming */
207 case ' ': /* {WS}+ */
208 case '\t':
209 break;
210 case '#': /* #.* strip comments */
211 while ((c = input()) != '\n' && c != 0)
213 unput(c);
214 break;
215 case ';':
216 RET(';');
217 case '\\':
218 if (peek() == '\n') {
219 input();
220 } else if (peek() == '\r') {
221 input(); input(); /* \n */
222 lineno++;
223 } else {
224 RET(c);
226 break;
227 case '&':
228 if (peek() == '&') {
229 input(); RET(AND);
230 } else
231 RET('&');
232 case '|':
233 if (peek() == '|') {
234 input(); RET(BOR);
235 } else
236 RET('|');
237 case '!':
238 if (peek() == '=') {
239 input(); yylval.i = NE; RET(NE);
240 } else if (peek() == '~') {
241 input(); yylval.i = NOTMATCH; RET(MATCHOP);
242 } else
243 RET(NOT);
244 case '~':
245 yylval.i = MATCH;
246 RET(MATCHOP);
247 case '<':
248 if (peek() == '=') {
249 input(); yylval.i = LE; RET(LE);
250 } else {
251 yylval.i = LT; RET(LT);
253 case '=':
254 if (peek() == '=') {
255 input(); yylval.i = EQ; RET(EQ);
256 } else {
257 yylval.i = ASSIGN; RET(ASGNOP);
259 case '>':
260 if (peek() == '=') {
261 input(); yylval.i = GE; RET(GE);
262 } else if (peek() == '>') {
263 input(); yylval.i = APPEND; RET(APPEND);
264 } else {
265 yylval.i = GT; RET(GT);
267 case '+':
268 if (peek() == '+') {
269 input(); yylval.i = INCR; RET(INCR);
270 } else if (peek() == '=') {
271 input(); yylval.i = ADDEQ; RET(ASGNOP);
272 } else
273 RET('+');
274 case '-':
275 if (peek() == '-') {
276 input(); yylval.i = DECR; RET(DECR);
277 } else if (peek() == '=') {
278 input(); yylval.i = SUBEQ; RET(ASGNOP);
279 } else
280 RET('-');
281 case '*':
282 if (peek() == '=') { /* *= */
283 input(); yylval.i = MULTEQ; RET(ASGNOP);
284 } else if (peek() == '*') { /* ** or **= */
285 input(); /* eat 2nd * */
286 if (peek() == '=') {
287 input(); yylval.i = POWEQ; RET(ASGNOP);
288 } else {
289 RET(POWER);
291 } else
292 RET('*');
293 case '/':
294 RET('/');
295 case '%':
296 if (peek() == '=') {
297 input(); yylval.i = MODEQ; RET(ASGNOP);
298 } else
299 RET('%');
300 case '^':
301 if (peek() == '=') {
302 input(); yylval.i = POWEQ; RET(ASGNOP);
303 } else
304 RET(POWER);
306 case '$':
307 /* BUG: awkward, if not wrong */
308 c = gettok(&buf, &bufsize);
309 if (isalpha(c)) {
310 if (strcmp(buf, "NF") == 0) { /* very special */
311 unputstr("(NF)");
312 RET(INDIRECT);
314 c = peek();
315 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
316 unputstr(buf);
317 RET(INDIRECT);
319 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
320 RET(IVAR);
321 } else if (c == 0) { /* */
322 SYNTAX( "unexpected end of input after $" );
323 RET(';');
324 } else {
325 unputstr(buf);
326 RET(INDIRECT);
329 case '}':
330 if (--bracecnt < 0)
331 SYNTAX( "extra }" );
332 sc = 1;
333 RET(';');
334 case ']':
335 if (--brackcnt < 0)
336 SYNTAX( "extra ]" );
337 RET(']');
338 case ')':
339 if (--parencnt < 0)
340 SYNTAX( "extra )" );
341 RET(')');
342 case '{':
343 bracecnt++;
344 RET('{');
345 case '[':
346 brackcnt++;
347 RET('[');
348 case '(':
349 parencnt++;
350 RET('(');
352 case '"':
353 return string(); /* BUG: should be like tran.c ? */
355 default:
356 RET(c);
361 int string(void)
363 int c, n;
364 char *s, *bp;
365 static char *buf = 0;
366 static int bufsz = 500;
368 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
369 FATAL("out of space for strings");
370 for (bp = buf; (c = input()) != '"'; ) {
371 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
372 FATAL("out of space for string %.10s...", buf);
373 switch (c) {
374 case '\n':
375 case '\r':
376 case 0:
377 SYNTAX( "non-terminated string %.10s...", buf );
378 lineno++;
379 if (c == 0) /* hopeless */
380 FATAL( "giving up" );
381 break;
382 case '\\':
383 c = input();
384 switch (c) {
385 case '"': *bp++ = '"'; break;
386 case 'n': *bp++ = '\n'; break;
387 case 't': *bp++ = '\t'; break;
388 case 'f': *bp++ = '\f'; break;
389 case 'r': *bp++ = '\r'; break;
390 case 'b': *bp++ = '\b'; break;
391 case 'v': *bp++ = '\v'; break;
392 case 'a': *bp++ = '\007'; break;
393 case '\\': *bp++ = '\\'; break;
395 case '0': case '1': case '2': /* octal: \d \dd \ddd */
396 case '3': case '4': case '5': case '6': case '7':
397 n = c - '0';
398 if ((c = peek()) >= '0' && c < '8') {
399 n = 8 * n + input() - '0';
400 if ((c = peek()) >= '0' && c < '8')
401 n = 8 * n + input() - '0';
403 *bp++ = n;
404 break;
406 case 'x': /* hex \x0-9a-fA-F + */
407 { char xbuf[100], *px;
408 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
409 if (isdigit(c)
410 || (c >= 'a' && c <= 'f')
411 || (c >= 'A' && c <= 'F'))
412 *px++ = c;
413 else
414 break;
416 *px = 0;
417 unput(c);
418 sscanf(xbuf, "%x", &n);
419 *bp++ = n;
420 break;
423 default:
424 *bp++ = c;
425 break;
427 break;
428 default:
429 *bp++ = c;
430 break;
433 *bp = 0;
434 s = tostring(buf);
435 *bp++ = ' '; *bp++ = 0;
436 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
437 RET(STRING);
441 int binsearch(char *w, Keyword *kp, int n)
443 int cond, low, mid, high;
445 low = 0;
446 high = n - 1;
447 while (low <= high) {
448 mid = (low + high) / 2;
449 if ((cond = strcmp(w, kp[mid].word)) < 0)
450 high = mid - 1;
451 else if (cond > 0)
452 low = mid + 1;
453 else
454 return mid;
456 return -1;
459 int word(char *w)
461 Keyword *kp;
462 int c, n;
464 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
465 kp = keywords + n;
466 if (n != -1) { /* found in table */
467 yylval.i = kp->sub;
468 switch (kp->type) { /* special handling */
469 case FSYSTEM:
470 if (safe)
471 SYNTAX( "system is unsafe" );
472 RET(kp->type);
473 case FUNC:
474 if (infunc)
475 SYNTAX( "illegal nested function" );
476 RET(kp->type);
477 case RETURN:
478 if (!infunc)
479 SYNTAX( "return not in function" );
480 RET(kp->type);
481 case VARNF:
482 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
483 RET(VARNF);
484 default:
485 RET(kp->type);
488 c = peek(); /* look for '(' */
489 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
490 yylval.i = n;
491 RET(ARG);
492 } else {
493 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
494 if (c == '(') {
495 RET(CALL);
496 } else {
497 RET(VAR);
502 void startreg(void) /* next call to yylex will return a regular expression */
504 reg = 1;
507 int regexpr(void)
509 int c;
510 static char *buf = 0;
511 static int bufsz = 500;
512 char *bp;
514 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
515 FATAL("out of space for rex expr");
516 bp = buf;
517 for ( ; (c = input()) != '/' && c != 0; ) {
518 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
519 FATAL("out of space for reg expr %.10s...", buf);
520 if (c == '\n') {
521 SYNTAX( "newline in regular expression %.10s...", buf );
522 unput('\n');
523 break;
524 } else if (c == '\\') {
525 *bp++ = '\\';
526 *bp++ = input();
527 } else {
528 *bp++ = c;
531 *bp = 0;
532 if (c == 0)
533 SYNTAX("non-terminated regular expression %.10s...", buf);
534 yylval.s = tostring(buf);
535 unput('/');
536 RET(REGEXPR);
539 /* low-level lexical stuff, sort of inherited from lex */
541 char ebuf[300];
542 char *ep = ebuf;
543 char yysbuf[100]; /* pushback buffer */
544 char *yysptr = yysbuf;
545 FILE *yyin = 0;
547 int input(void) /* get next lexical input character */
549 int c;
550 extern char *lexprog;
552 if (yysptr > yysbuf)
553 c = (uschar)*--yysptr;
554 else if (lexprog != NULL) { /* awk '...' */
555 if ((c = (uschar)*lexprog) != 0)
556 lexprog++;
557 } else /* awk -f ... */
558 c = pgetc();
559 if (c == '\n')
560 lineno++;
561 else if (c == EOF)
562 c = 0;
563 if (ep >= ebuf + sizeof ebuf)
564 ep = ebuf;
565 return *ep++ = c;
568 void unput(int c) /* put lexical character back on input */
570 if (c == '\n')
571 lineno--;
572 if (yysptr >= yysbuf + sizeof(yysbuf))
573 FATAL("pushed back too much: %.20s...", yysbuf);
574 *yysptr++ = c;
575 if (--ep < ebuf)
576 ep = ebuf + sizeof(ebuf) - 1;
579 void unputstr(const char *s) /* put a string back on input */
581 int i;
583 for (i = strlen(s)-1; i >= 0; i--)
584 unput(s[i]);