1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
30 #include "awkgram.tab.h"
32 extern YYSTYPE yylval
;
40 typedef struct Keyword
{
46 const Keyword keywords
[] = { /* keep sorted: binary searched */
47 { "BEGIN", XBEGIN
, XBEGIN
},
48 { "END", XEND
, XEND
},
49 { "NF", VARNF
, VARNF
},
50 { "atan2", FATAN
, BLTIN
},
51 { "break", BREAK
, BREAK
},
52 { "close", CLOSE
, CLOSE
},
53 { "continue", CONTINUE
, CONTINUE
},
54 { "cos", FCOS
, BLTIN
},
55 { "delete", DELETE
, DELETE
},
57 { "else", ELSE
, ELSE
},
58 { "exit", EXIT
, EXIT
},
59 { "exp", FEXP
, BLTIN
},
60 { "fflush", FFLUSH
, BLTIN
},
62 { "func", FUNC
, FUNC
},
63 { "function", FUNC
, FUNC
},
64 { "getline", GETLINE
, GETLINE
},
65 { "gsub", GSUB
, GSUB
},
68 { "index", INDEX
, INDEX
},
69 { "int", FINT
, BLTIN
},
70 { "length", FLENGTH
, BLTIN
},
71 { "log", FLOG
, BLTIN
},
72 { "match", MATCHFCN
, MATCHFCN
},
73 { "next", NEXT
, NEXT
},
74 { "nextfile", NEXTFILE
, NEXTFILE
},
75 { "print", PRINT
, PRINT
},
76 { "printf", PRINTF
, PRINTF
},
77 { "rand", FRAND
, BLTIN
},
78 { "return", RETURN
, RETURN
},
79 { "sin", FSIN
, BLTIN
},
80 { "split", SPLIT
, SPLIT
},
81 { "sprintf", SPRINTF
, SPRINTF
},
82 { "sqrt", FSQRT
, BLTIN
},
83 { "srand", FSRAND
, BLTIN
},
85 { "substr", SUBSTR
, SUBSTR
},
86 { "system", FSYSTEM
, BLTIN
},
87 { "tolower", FTOLOWER
, BLTIN
},
88 { "toupper", FTOUPPER
, BLTIN
},
89 { "while", WHILE
, WHILE
},
92 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
101 static int gettok(char **pbuf
, int *psz
) /* get next input token */
113 if (!isalnum(c
) && c
!= '.' && c
!= '_')
117 if (isalpha(c
) || c
== '_') { /* it's a varname */
118 for ( ; (c
= input()) != 0; ) {
120 if (!adjbuf(&buf
, &sz
, bp
-buf
+2, 100, &bp
, "gettok"))
121 FATAL( "out of space for name %.10s...", buf
);
122 if (isalnum(c
) || c
== '_')
131 retc
= 'a'; /* alphanumeric */
132 } else { /* maybe it's a number, but could be . */
134 /* read input until can't be a number */
135 for ( ; (c
= input()) != 0; ) {
137 if (!adjbuf(&buf
, &sz
, bp
-buf
+2, 100, &bp
, "gettok"))
138 FATAL( "out of space for number %.10s...", buf
);
139 if (isdigit(c
) || c
== 'e' || c
== 'E'
140 || c
== '.' || c
== '+' || c
== '-')
148 strtod(buf
, &rem
); /* parse the number */
149 if (rem
== buf
) { /* it wasn't a valid number at all */
150 buf
[1] = 0; /* return one character as token */
151 retc
= (uschar
)buf
[0]; /* character is its own type */
152 unputstr(rem
+1); /* put rest back for later */
153 } else { /* some prefix was a number */
154 unputstr(rem
); /* put rest back for later */
155 rem
[0] = 0; /* truncate buf after number part */
156 retc
= '0'; /* type is number */
167 bool sc
= false; /* true => return a } right now */
168 bool reg
= false; /* true => return a REGEXPR now */
173 static char *buf
= NULL
;
174 static int bufsize
= 5; /* BUG: setting this small causes core dump! */
176 if (buf
== NULL
&& (buf
= (char *) malloc(bufsize
)) == NULL
)
177 FATAL( "out of space in yylex" );
187 c
= gettok(&buf
, &bufsize
);
190 if (isalpha(c
) || c
== '_')
193 char *cp
= tostring(buf
);
196 if (is_number(cp
, & result
))
197 yylval
.cp
= setsymtab(buf
, cp
, result
, CON
|NUM
, symtab
);
199 yylval
.cp
= setsymtab(buf
, cp
, 0.0, STR
, symtab
);
201 /* should this also have STR set? */
207 case '\n': /* {EOL} */
210 case '\r': /* assume \n is coming */
211 case ' ': /* {WS}+ */
214 case '#': /* #.* strip comments */
215 while ((c
= input()) != '\n' && c
!= 0)
219 * Next line is a hack, itcompensates for
220 * unput's treatment of \n.
227 if (peek() == '\n') {
230 } else if (peek() == '\r') {
231 input(); input(); /* \n */
249 input(); yylval
.i
= NE
; RET(NE
);
250 } else if (peek() == '~') {
251 input(); yylval
.i
= NOTMATCH
; RET(MATCHOP
);
259 input(); yylval
.i
= LE
; RET(LE
);
261 yylval
.i
= LT
; RET(LT
);
265 input(); yylval
.i
= EQ
; RET(EQ
);
267 yylval
.i
= ASSIGN
; RET(ASGNOP
);
271 input(); yylval
.i
= GE
; RET(GE
);
272 } else if (peek() == '>') {
273 input(); yylval
.i
= APPEND
; RET(APPEND
);
275 yylval
.i
= GT
; RET(GT
);
279 input(); yylval
.i
= INCR
; RET(INCR
);
280 } else if (peek() == '=') {
281 input(); yylval
.i
= ADDEQ
; RET(ASGNOP
);
286 input(); yylval
.i
= DECR
; RET(DECR
);
287 } else if (peek() == '=') {
288 input(); yylval
.i
= SUBEQ
; RET(ASGNOP
);
292 if (peek() == '=') { /* *= */
293 input(); yylval
.i
= MULTEQ
; RET(ASGNOP
);
294 } else if (peek() == '*') { /* ** or **= */
295 input(); /* eat 2nd * */
297 input(); yylval
.i
= POWEQ
; RET(ASGNOP
);
307 input(); yylval
.i
= MODEQ
; RET(ASGNOP
);
312 input(); yylval
.i
= POWEQ
; RET(ASGNOP
);
317 /* BUG: awkward, if not wrong */
318 c
= gettok(&buf
, &bufsize
);
320 if (strcmp(buf
, "NF") == 0) { /* very special */
325 if (c
== '(' || c
== '[' || (infunc
&& isarg(buf
) >= 0)) {
329 yylval
.cp
= setsymtab(buf
, "", 0.0, STR
|NUM
, symtab
);
331 } else if (c
== 0) { /* */
332 SYNTAX( "unexpected end of input after $" );
363 return string(); /* BUG: should be like tran.c ? */
375 static char *buf
= NULL
;
376 static int bufsz
= 500;
378 if (buf
== NULL
&& (buf
= (char *) malloc(bufsz
)) == NULL
)
379 FATAL("out of space for strings");
380 for (bp
= buf
; (c
= input()) != '"'; ) {
381 if (!adjbuf(&buf
, &bufsz
, bp
-buf
+2, 500, &bp
, "string"))
382 FATAL("out of space for string %.10s...", buf
);
388 SYNTAX( "non-terminated string %.10s...", buf
);
389 if (c
== 0) /* hopeless */
390 FATAL( "giving up" );
397 case '"': *bp
++ = '"'; break;
398 case 'n': *bp
++ = '\n'; break;
399 case 't': *bp
++ = '\t'; break;
400 case 'f': *bp
++ = '\f'; break;
401 case 'r': *bp
++ = '\r'; break;
402 case 'b': *bp
++ = '\b'; break;
403 case 'v': *bp
++ = '\v'; break;
404 case 'a': *bp
++ = '\a'; break;
405 case '\\': *bp
++ = '\\'; break;
407 case '0': case '1': case '2': /* octal: \d \dd \ddd */
408 case '3': case '4': case '5': case '6': case '7':
410 if ((c
= peek()) >= '0' && c
< '8') {
411 n
= 8 * n
+ input() - '0';
412 if ((c
= peek()) >= '0' && c
< '8')
413 n
= 8 * n
+ input() - '0';
418 case 'x': /* hex \x0-9a-fA-F + */
419 { char xbuf
[100], *px
;
420 for (px
= xbuf
; (c
= input()) != 0 && px
-xbuf
< 100-2; ) {
422 || (c
>= 'a' && c
<= 'f')
423 || (c
>= 'A' && c
<= 'F'))
430 sscanf(xbuf
, "%x", (unsigned int *) &n
);
447 *bp
++ = ' '; *bp
++ = '\0';
448 yylval
.cp
= setsymtab(buf
, s
, 0.0, CON
|STR
|DONTFREE
, symtab
);
454 static int binsearch(char *w
, const Keyword
*kp
, int n
)
456 int cond
, low
, mid
, high
;
460 while (low
<= high
) {
461 mid
= (low
+ high
) / 2;
462 if ((cond
= strcmp(w
, kp
[mid
].word
)) < 0)
477 n
= binsearch(w
, keywords
, sizeof(keywords
)/sizeof(keywords
[0]));
478 if (n
!= -1) { /* found in table */
481 switch (kp
->type
) { /* special handling */
483 if (kp
->sub
== FSYSTEM
&& safe
)
484 SYNTAX( "system is unsafe" );
488 SYNTAX( "illegal nested function" );
492 SYNTAX( "return not in function" );
495 yylval
.cp
= setsymtab("NF", "", 0.0, NUM
, symtab
);
501 c
= peek(); /* look for '(' */
502 if (c
!= '(' && infunc
&& (n
=isarg(w
)) >= 0) {
506 yylval
.cp
= setsymtab(w
, "", 0.0, STR
|NUM
|DONTFREE
, symtab
);
515 void startreg(void) /* next call to yylex will return a regular expression */
523 static char *buf
= NULL
;
524 static int bufsz
= 500;
527 if (buf
== NULL
&& (buf
= (char *) malloc(bufsz
)) == NULL
)
528 FATAL("out of space for reg expr");
530 for ( ; (c
= input()) != '/' && c
!= 0; ) {
531 if (!adjbuf(&buf
, &bufsz
, bp
-buf
+3, 500, &bp
, "regexpr"))
532 FATAL("out of space for reg expr %.10s...", buf
);
535 SYNTAX( "newline in regular expression %.10s...", buf
);
538 } else if (c
== '\\') {
547 SYNTAX("non-terminated regular expression %.10s...", buf
);
553 /* low-level lexical stuff, sort of inherited from lex */
557 char yysbuf
[100]; /* pushback buffer */
558 char *yysptr
= yysbuf
;
561 int input(void) /* get next lexical input character */
564 extern char *lexprog
;
567 c
= (uschar
)*--yysptr
;
568 else if (lexprog
!= NULL
) { /* awk '...' */
569 if ((c
= (uschar
)*lexprog
) != 0)
571 } else /* awk -f ... */
575 if (ep
>= ebuf
+ sizeof ebuf
)
584 void unput(int c
) /* put lexical character back on input */
588 if (yysptr
>= yysbuf
+ sizeof(yysbuf
))
589 FATAL("pushed back too much: %.20s...", yysbuf
);
592 ep
= ebuf
+ sizeof(ebuf
) - 1;
595 void unputstr(const char *s
) /* put a string back on input */
599 for (i
= strlen(s
)-1; i
>= 0; i
--)