Sync usage with man page.
[netbsd-mini2440.git] / dist / nawk / run.c
blobd9ae9ce7e4837b4051dcf431844af489e089d319
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
29 #define DEBUG
30 #include <stdio.h>
31 #include <ctype.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <setjmp.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <time.h>
40 #include "awk.h"
41 #include "awkgram.h"
43 #define tempfree(x) if (istemp(x)) tfree(x); else
45 void stdinit(void);
48 #undef tempfree
50 void tempfree(Cell *p) {
51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52 WARNING("bad csub %d in Cell %d %s",
53 p->csub, p->ctype, p->sval);
55 if (istemp(p))
56 tfree(p);
60 /* do we really need these? */
61 /* #ifdef _NFILE */
62 /* #ifndef FOPEN_MAX */
63 /* #define FOPEN_MAX _NFILE */
64 /* #endif */
65 /* #endif */
66 /* */
67 /* #ifndef FOPEN_MAX */
68 /* #define FOPEN_MAX 40 */ /* max number of open files */
69 /* #endif */
70 /* */
71 /* #ifndef RAND_MAX */
72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
73 /* #endif */
75 jmp_buf env;
76 extern int pairstack[];
78 Node *winner = NULL; /* root of parse tree */
79 Cell *tmps; /* free temporary cells for execution */
81 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
82 Cell *True = &truecell;
83 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM };
84 Cell *False = &falsecell;
85 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM };
86 Cell *jbreak = &breakcell;
87 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM };
88 Cell *jcont = &contcell;
89 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM };
90 Cell *jnext = &nextcell;
91 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM };
92 Cell *jnextfile = &nextfilecell;
93 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM };
94 Cell *jexit = &exitcell;
95 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM };
96 Cell *jret = &retcell;
97 static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
99 Node *curnode = NULL; /* the node being executed, for debugging */
101 /* buffer memory management */
102 int adjbuf(uschar **pbuf, int *psiz, int minlen, int quantum, uschar **pbptr,
103 const char *whatrtn)
104 /* pbuf: address of pointer to buffer being managed
105 * psiz: address of buffer size variable
106 * minlen: minimum length of buffer needed
107 * quantum: buffer size quantum
108 * pbptr: address of movable pointer into buffer, or 0 if none
109 * whatrtn: name of the calling routine if failure should cause fatal error
111 * return 0 for realloc failure, !=0 for success
114 if (minlen > *psiz) {
115 char *tbuf;
116 int rminlen = quantum ? minlen % quantum : 0;
117 int boff = pbptr ? *pbptr - *pbuf : 0;
118 /* round up to next multiple of quantum */
119 if (rminlen)
120 minlen += quantum - rminlen;
121 tbuf = (char *) realloc(*pbuf, minlen);
122 dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
123 if (tbuf == NULL) {
124 if (whatrtn)
125 FATAL("out of memory in %s", whatrtn);
126 return 0;
128 *pbuf = tbuf;
129 *psiz = minlen;
130 if (pbptr)
131 *pbptr = tbuf + boff;
133 return 1;
136 void run(Node *a) /* execution of parse tree starts here */
138 stdinit();
139 execute(a);
140 closeall();
143 Cell *execute(Node *u) /* execute a node of the parse tree */
145 Cell *(*proc)(Node **, int);
146 Cell *x;
147 Node *a;
149 if (u == NULL)
150 return(True);
151 for (a = u; ; a = a->nnext) {
152 curnode = a;
153 if (isvalue(a)) {
154 x = (Cell *) (a->narg[0]);
155 if (isfld(x) && !donefld)
156 fldbld();
157 else if (isrec(x) && !donerec)
158 recbld();
159 return(x);
161 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
162 FATAL("illegal statement");
163 proc = proctab[a->nobj-FIRSTTOKEN];
164 x = (*proc)(a->narg, a->nobj);
165 if (isfld(x) && !donefld)
166 fldbld();
167 else if (isrec(x) && !donerec)
168 recbld();
169 if (isexpr(a))
170 return(x);
171 if (isjump(x))
172 return(x);
173 if (a->nnext == NULL)
174 return(x);
175 tempfree(x);
180 Cell *program(Node **a, int n) /* execute an awk program */
181 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
182 Cell *x;
184 if (setjmp(env) != 0)
185 goto ex;
186 if (a[0]) { /* BEGIN */
187 x = execute(a[0]);
188 if (isexit(x))
189 return(True);
190 if (isjump(x))
191 FATAL("illegal break, continue, next or nextfile from BEGIN");
192 tempfree(x);
194 if (a[1] || a[2])
195 while (getrec(&record, &recsize, 1) > 0) {
196 x = execute(a[1]);
197 if (isexit(x))
198 break;
199 tempfree(x);
202 if (setjmp(env) != 0) /* handles exit within END */
203 goto ex1;
204 if (a[2]) { /* END */
205 x = execute(a[2]);
206 if (isbreak(x) || isnext(x) || iscont(x))
207 FATAL("illegal break, continue, next or nextfile from END");
208 tempfree(x);
210 ex1:
211 return(True);
214 struct Frame { /* stack frame for awk function calls */
215 int nargs; /* number of arguments in this call */
216 Cell *fcncell; /* pointer to Cell for function */
217 Cell **args; /* pointer to array of arguments after execute */
218 Cell *retval; /* return value */
221 #define NARGS 50 /* max args in a call */
223 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
224 int nframe = 0; /* number of frames allocated */
225 struct Frame *fp = NULL; /* frame pointer. bottom level unused */
227 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
229 static const Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
230 int i, ncall, ndef;
231 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
232 Node *x;
233 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
234 Cell *y, *z, *fcn;
235 char *s;
237 fcn = execute(a[0]); /* the function itself */
238 s = fcn->nval;
239 if (!isfcn(fcn))
240 FATAL("calling undefined function %s", s);
241 if (frame == NULL) {
242 fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame));
243 if (frame == NULL)
244 FATAL("out of space for stack frames calling %s", s);
246 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
247 ncall++;
248 ndef = (int) fcn->fval; /* args in defn */
249 dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
250 if (ncall > ndef)
251 WARNING("function %s called with %d args, uses only %d",
252 s, ncall, ndef);
253 if (ncall + ndef > NARGS)
254 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
255 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
256 dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
257 y = execute(x);
258 oargs[i] = y;
259 dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
260 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
261 if (isfcn(y))
262 FATAL("can't use function %s as argument in %s", y->nval, s);
263 if (isarr(y))
264 args[i] = y; /* arrays by ref */
265 else
266 args[i] = copycell(y);
267 tempfree(y);
269 for ( ; i < ndef; i++) { /* add null args for ones not provided */
270 args[i] = gettemp();
271 *args[i] = newcopycell;
273 fp++; /* now ok to up frame */
274 if (fp >= frame + nframe) {
275 int dfp = fp - frame; /* old index */
276 frame = (struct Frame *)
277 realloc((char *) frame, (nframe += 100) * sizeof(struct Frame));
278 if (frame == NULL)
279 FATAL("out of space for stack frames in %s", s);
280 fp = frame + dfp;
282 fp->fcncell = fcn;
283 fp->args = args;
284 fp->nargs = ndef; /* number defined with (excess are locals) */
285 fp->retval = gettemp();
287 dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
288 y = execute((Node *)(fcn->sval)); /* execute body */
289 dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
291 for (i = 0; i < ndef; i++) {
292 Cell *t = fp->args[i];
293 if (isarr(t)) {
294 if (t->csub == CCOPY) {
295 if (i >= ncall) {
296 freesymtab(t);
297 t->csub = CTEMP;
298 tempfree(t);
299 } else {
300 oargs[i]->tval = t->tval;
301 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
302 oargs[i]->sval = t->sval;
303 tempfree(t);
306 } else if (t != y) { /* kludge to prevent freeing twice */
307 t->csub = CTEMP;
308 tempfree(t);
309 } else if (t == y && t->csub == CCOPY) {
310 t->csub = CTEMP;
311 tempfree(t);
312 freed = 1;
315 tempfree(fcn);
316 if (isexit(y) || isnext(y))
317 return y;
318 if (freed == 0) {
319 tempfree(y); /* don't free twice! */
321 z = fp->retval; /* return value */
322 dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
323 fp--;
324 return(z);
327 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
329 Cell *y;
331 /* copy is not constant or field */
333 y = gettemp();
334 y->tval = x->tval & ~(CON|FLD|REC);
335 y->csub = CCOPY; /* prevents freeing until call is over */
336 y->nval = x->nval; /* BUG? */
337 if (isstr(x) /* || x->ctype == OCELL */) {
338 y->sval = tostring(x->sval);
339 y->tval &= ~DONTFREE;
340 } else
341 y->tval |= DONTFREE;
342 y->fval = x->fval;
343 return y;
346 Cell *arg(Node **a, int n) /* nth argument of a function */
349 n = ptoi(a[0]); /* argument number, counting from 0 */
350 dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
351 if (n+1 > fp->nargs)
352 FATAL("argument #%d of function %s was not supplied",
353 n+1, fp->fcncell->nval);
354 return fp->args[n];
357 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
359 Cell *y;
361 switch (n) {
362 case EXIT:
363 if (a[0] != NULL) {
364 y = execute(a[0]);
365 errorflag = (int) getfval(y);
366 tempfree(y);
368 longjmp(env, 1);
369 case RETURN:
370 if (a[0] != NULL) {
371 y = execute(a[0]);
372 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
373 setsval(fp->retval, getsval(y));
374 fp->retval->fval = getfval(y);
375 fp->retval->tval |= NUM;
377 else if (y->tval & STR)
378 setsval(fp->retval, getsval(y));
379 else if (y->tval & NUM)
380 setfval(fp->retval, getfval(y));
381 else /* can't happen */
382 FATAL("bad type variable %d", y->tval);
383 tempfree(y);
385 return(jret);
386 case NEXT:
387 return(jnext);
388 case NEXTFILE:
389 nextfile();
390 return(jnextfile);
391 case BREAK:
392 return(jbreak);
393 case CONTINUE:
394 return(jcont);
395 default: /* can't happen */
396 FATAL("illegal jump type %d", n);
398 return 0; /* not reached */
401 Cell *get_line(Node **a, int n) /* get next line from specific input */
402 { /* a[0] is variable, a[1] is operator, a[2] is filename */
403 Cell *r, *x;
404 extern Cell **fldtab;
405 FILE *fp;
406 uschar *buf;
407 int bufsize = recsize;
408 int mode;
410 if ((buf = (char *) malloc(bufsize)) == NULL)
411 FATAL("out of memory in get_line");
413 fflush(stdout); /* in case someone is waiting for a prompt */
414 r = gettemp();
415 if (a[1] != NULL) { /* get_line < file */
416 x = execute(a[2]); /* filename */
417 mode = ptoi(a[1]);
418 if (mode == '|') /* input pipe */
419 mode = LE; /* arbitrary flag */
420 fp = openfile(mode, getsval(x));
421 tempfree(x);
422 if (fp == NULL)
423 n = -1;
424 else
425 n = readrec(&buf, &bufsize, fp);
426 if (n <= 0) {
428 } else if (a[0] != NULL) { /* get_line var <file */
429 x = execute(a[0]);
430 setsval(x, buf);
431 tempfree(x);
432 } else { /* get_line <file */
433 setsval(fldtab[0], buf);
434 if (is_number(fldtab[0]->sval)) {
435 fldtab[0]->fval = atof(fldtab[0]->sval);
436 fldtab[0]->tval |= NUM;
439 } else { /* bare get_line; use current input */
440 if (a[0] == NULL) /* get_line */
441 n = getrec(&record, &recsize, 1);
442 else { /* get_line var */
443 n = getrec(&buf, &bufsize, 0);
444 x = execute(a[0]);
445 setsval(x, buf);
446 tempfree(x);
449 setfval(r, (Awkfloat) n);
450 free(buf);
451 return r;
454 Cell *getnf(Node **a, int n) /* get NF */
456 if (donefld == 0)
457 fldbld();
458 return (Cell *) a[0];
461 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
463 Cell *x, *y, *z;
464 char *s;
465 Node *np;
466 uschar *buf;
467 int bufsz = recsize;
468 int nsub = strlen(*SUBSEP);
470 if ((buf = (char *) malloc(bufsz)) == NULL)
471 FATAL("out of memory in array");
473 x = execute(a[0]); /* Cell* for symbol table */
474 buf[0] = 0;
475 for (np = a[1]; np; np = np->nnext) {
476 y = execute(np); /* subscript */
477 s = getsval(y);
478 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
479 FATAL("out of memory for %s[%s...]", x->nval, buf);
480 strlcat(buf, s, bufsz);
481 if (np->nnext)
482 strlcat(buf, *SUBSEP, bufsz);
483 tempfree(y);
485 if (!isarr(x)) {
486 dprintf( ("making %s into an array\n", NN(x->nval)) );
487 if (freeable(x))
488 xfree(x->sval);
489 x->tval &= ~(STR|NUM|DONTFREE);
490 x->tval |= ARR;
491 x->sval = (char *) makesymtab(NSYMTAB);
493 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
494 z->ctype = OCELL;
495 z->csub = CVAR;
496 tempfree(x);
497 free(buf);
498 return(z);
501 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
503 Cell *x, *y;
504 Node *np;
505 uschar *s;
506 int nsub = strlen(*SUBSEP);
508 x = execute(a[0]); /* Cell* for symbol table */
509 if (!isarr(x))
510 return True;
511 if (a[1] == 0) { /* delete the elements, not the table */
512 freesymtab(x);
513 x->tval &= ~STR;
514 x->tval |= ARR;
515 x->sval = (char *) makesymtab(NSYMTAB);
516 } else {
517 int bufsz = recsize;
518 uschar *buf;
519 if ((buf = malloc(bufsz)) == NULL)
520 FATAL("out of memory in adelete");
521 buf[0] = 0;
522 for (np = a[1]; np; np = np->nnext) {
523 y = execute(np); /* subscript */
524 s = getsval(y);
525 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
526 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
527 strlcat(buf, s, bufsz);
528 if (np->nnext)
529 strlcat(buf, *SUBSEP, bufsz);
530 tempfree(y);
532 freeelem(x, buf);
533 free(buf);
535 tempfree(x);
536 return True;
539 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
541 Cell *x, *ap, *k;
542 Node *p;
543 uschar *buf;
544 char *s;
545 int bufsz = recsize;
546 int nsub = strlen(*SUBSEP);
548 ap = execute(a[1]); /* array name */
549 if (!isarr(ap)) {
550 dprintf( ("making %s into an array\n", ap->nval) );
551 if (freeable(ap))
552 xfree(ap->sval);
553 ap->tval &= ~(STR|NUM|DONTFREE);
554 ap->tval |= ARR;
555 ap->sval = (char *) makesymtab(NSYMTAB);
557 if ((buf = malloc(bufsz)) == NULL) {
558 FATAL("out of memory in intest");
560 buf[0] = 0;
561 for (p = a[0]; p; p = p->nnext) {
562 x = execute(p); /* expr */
563 s = getsval(x);
564 if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
565 FATAL("out of memory deleting %s[%s...]", x->nval, buf);
566 strcat(buf, s);
567 tempfree(x);
568 if (p->nnext)
569 strcat(buf, *SUBSEP);
571 k = lookup(buf, (Array *) ap->sval);
572 tempfree(ap);
573 free(buf);
574 if (k == NULL)
575 return(False);
576 else
577 return(True);
581 Cell *matchop(Node **a, int n) /* ~ and match() */
583 Cell *x, *y;
584 uschar *s;
585 char *t;
586 int i;
587 fa *pfa;
588 int (*mf)(fa *, const char *) = match, mode = 0;
590 if (n == MATCHFCN) {
591 mf = pmatch;
592 mode = 1;
594 x = execute(a[1]); /* a[1] = target text */
595 s = getsval(x);
596 if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */
597 i = (*mf)((fa *) a[2], s);
598 else {
599 y = execute(a[2]); /* a[2] = regular expr */
600 t = getsval(y);
601 pfa = makedfa(t, mode);
602 i = (*mf)(pfa, s);
603 tempfree(y);
605 tempfree(x);
606 if (n == MATCHFCN) {
607 int start = patbeg - s + 1;
608 if (patlen < 0)
609 start = 0;
610 setfval(rstartloc, (Awkfloat) start);
611 setfval(rlengthloc, (Awkfloat) patlen);
612 x = gettemp();
613 x->tval = NUM;
614 x->fval = start;
615 return x;
616 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
617 return(True);
618 else
619 return(False);
623 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
625 Cell *x, *y;
626 int i;
628 x = execute(a[0]);
629 i = istrue(x);
630 tempfree(x);
631 switch (n) {
632 case BOR:
633 if (i) return(True);
634 y = execute(a[1]);
635 i = istrue(y);
636 tempfree(y);
637 if (i) return(True);
638 else return(False);
639 case AND:
640 if ( !i ) return(False);
641 y = execute(a[1]);
642 i = istrue(y);
643 tempfree(y);
644 if (i) return(True);
645 else return(False);
646 case NOT:
647 if (i) return(False);
648 else return(True);
649 default: /* can't happen */
650 FATAL("unknown boolean operator %d", n);
652 return 0; /*NOTREACHED*/
655 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
657 int i;
658 Cell *x, *y;
659 Awkfloat j;
661 x = execute(a[0]);
662 y = execute(a[1]);
663 if (x->tval&NUM && y->tval&NUM) {
664 j = x->fval - y->fval;
665 i = j<0? -1: (j>0? 1: 0);
666 } else {
667 i = strcmp(getsval(x), getsval(y));
669 tempfree(x);
670 tempfree(y);
671 switch (n) {
672 case LT: if (i<0) return(True);
673 else return(False);
674 case LE: if (i<=0) return(True);
675 else return(False);
676 case NE: if (i!=0) return(True);
677 else return(False);
678 case EQ: if (i == 0) return(True);
679 else return(False);
680 case GE: if (i>=0) return(True);
681 else return(False);
682 case GT: if (i>0) return(True);
683 else return(False);
684 default: /* can't happen */
685 FATAL("unknown relational operator %d", n);
687 return 0; /*NOTREACHED*/
690 void tfree(Cell *a) /* free a tempcell */
692 if (freeable(a)) {
693 dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
694 xfree(a->sval);
696 if (a == tmps)
697 FATAL("tempcell list is curdled");
698 a->cnext = tmps;
699 tmps = a;
702 Cell *gettemp(void) /* get a tempcell */
703 { int i;
704 Cell *x;
706 if (!tmps) {
707 tmps = (Cell *) calloc(100, sizeof(Cell));
708 if (!tmps)
709 FATAL("out of space for temporaries");
710 for(i = 1; i < 100; i++)
711 tmps[i-1].cnext = &tmps[i];
712 tmps[i-1].cnext = 0;
714 x = tmps;
715 tmps = x->cnext;
716 *x = tempcell;
717 return(x);
720 Cell *indirect(Node **a, int n) /* $( a[0] ) */
722 Awkfloat val;
723 Cell *x;
724 int m;
725 char *s;
727 x = execute(a[0]);
728 val = getfval(x); /* freebsd: defend against super large field numbers */
729 if ((Awkfloat)INT_MAX < val)
730 FATAL("trying to access out of range field %s", x->nval);
731 m = (int) val;
732 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
733 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
734 /* BUG: can x->nval ever be null??? */
735 tempfree(x);
736 x = fieldadr(m);
737 x->ctype = OCELL; /* BUG? why are these needed? */
738 x->csub = CFLD;
739 return(x);
742 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
744 int k, m, n;
745 char *s;
746 int temp;
747 Cell *x, *y, *z = 0;
749 x = execute(a[0]);
750 y = execute(a[1]);
751 if (a[2] != 0)
752 z = execute(a[2]);
753 s = getsval(x);
754 k = strlen(s) + 1;
755 if (k <= 1) {
756 tempfree(x);
757 tempfree(y);
758 if (a[2] != 0) {
759 tempfree(z);
761 x = gettemp();
762 setsval(x, "");
763 return(x);
765 m = (int) getfval(y);
766 if (m <= 0)
767 m = 1;
768 else if (m > k)
769 m = k;
770 tempfree(y);
771 if (a[2] != 0) {
772 n = (int) getfval(z);
773 tempfree(z);
774 } else
775 n = k - 1;
776 if (n < 0)
777 n = 0;
778 else if (n > k - m)
779 n = k - m;
780 dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
781 y = gettemp();
782 temp = s[n+m-1]; /* with thanks to John Linderman */
783 s[n+m-1] = '\0';
784 setsval(y, s + m - 1);
785 s[n+m-1] = temp;
786 tempfree(x);
787 return(y);
790 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
792 Cell *x, *y, *z;
793 char *s1, *s2, *p1, *p2, *q;
794 Awkfloat v = 0.0;
796 x = execute(a[0]);
797 s1 = getsval(x);
798 y = execute(a[1]);
799 s2 = getsval(y);
801 z = gettemp();
802 for (p1 = s1; *p1 != '\0'; p1++) {
803 for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
805 if (*p2 == '\0') {
806 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
807 break;
810 tempfree(x);
811 tempfree(y);
812 setfval(z, v);
813 return(z);
816 #define MAXNUMSIZE 50
818 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
820 uschar *fmt, *p, *t;
821 const char *os;
822 Cell *x;
823 int flag = 0, n;
824 int fmtwd; /* format width */
825 int fmtsz = recsize;
826 uschar *buf = *pbuf;
827 int bufsize = *pbufsize;
828 #define FMTSZ(a) (fmtsz - ((a) - fmt))
829 #define BUFSZ(a) (bufsize - ((a) - buf))
831 os = s;
832 p = buf;
833 if ((fmt = (char *) malloc(fmtsz)) == NULL)
834 FATAL("out of memory in format()");
835 while (*s) {
836 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
837 if (*s != '%') {
838 *p++ = *s++;
839 continue;
841 if (*(s+1) == '%') {
842 *p++ = '%';
843 s += 2;
844 continue;
846 /* have to be real careful in case this is a huge number, eg, %100000d */
847 fmtwd = atoi(s+1);
848 if (fmtwd < 0)
849 fmtwd = -fmtwd;
850 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
851 for (t = fmt; (*t++ = *s) != '\0'; s++) {
852 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
853 FATAL("format item %.30s... ran format() out of memory", os);
854 if (*s == 'l' || *s == 'h' || *s == 'L')
855 goto weird;
856 if (isalpha((uschar)*s))
857 break; /* the ansi panoply */
858 if (*s == '*') {
859 if (a == NULL)
860 FATAL("not enough args in printf("
861 "\"%.30s\")", os);
862 x = execute(a);
863 a = a->nnext;
864 snprintf(t - 1, FMTSZ(t - 1),
865 "%d", fmtwd=(int) getfval(x));
866 if (fmtwd < 0)
867 fmtwd = -fmtwd;
868 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
869 t = fmt + strlen(fmt);
870 tempfree(x);
873 *t = '\0';
874 if (fmtwd < 0)
875 fmtwd = -fmtwd;
876 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
878 switch (*s) {
879 case 'f': case 'e': case 'g': case 'E': case 'G':
880 flag = 'f';
881 break;
882 case 'd': case 'i':
883 flag = 'd';
884 if(*(s-1) == 'l') break;
885 *(t-1) = 'l';
886 *t = 'd';
887 *++t = '\0';
888 break;
889 case 'o': case 'x': case 'X': case 'u':
890 flag = *(s-1) == 'l' ? 'd' : 'u';
891 break;
892 case 's':
893 flag = 's';
894 break;
895 case 'c':
896 flag = 'c';
897 break;
898 default:
899 weird:
900 WARNING("weird printf conversion %s", fmt);
901 flag = '?';
902 break;
904 if (a == NULL)
905 FATAL("not enough args in printf(%s)", os);
906 x = execute(a);
907 a = a->nnext;
908 n = MAXNUMSIZE;
909 if (fmtwd > n)
910 n = fmtwd;
911 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
912 switch (flag) {
913 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
914 t = getsval(x);
915 n = strlen(t);
916 if (fmtwd > n)
917 n = fmtwd;
918 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
919 p += strlen(p);
920 snprintf(p, BUFSZ(p), "%s", t);
921 break;
922 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
923 case 'd': snprintf(p, BUFSZ(p), fmt, (long) getfval(x)); break;
924 case 'u': snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); break;
925 case 's':
926 t = getsval(x);
927 n = strlen(t);
928 if (fmtwd > n)
929 n = fmtwd;
930 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
931 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
932 snprintf(p, BUFSZ(p), fmt, t);
933 break;
934 case 'c':
935 if (isnum(x)) {
936 if (getfval(x))
937 snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
938 else {
939 *p++ = '\0'; /* explicit null byte */
940 *p = '\0'; /* next output will start here */
942 } else
943 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
944 break;
945 default:
946 FATAL("can't happen: bad conversion %c in format()", flag);
948 tempfree(x);
949 p += strlen(p);
950 s++;
952 *p = '\0';
953 free(fmt);
954 for ( ; a; a = a->nnext) /* evaluate any remaining args */
955 execute(a);
956 *pbuf = buf;
957 *pbufsize = bufsize;
958 return p - buf;
961 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
963 Cell *x;
964 Node *y;
965 char *buf;
966 int bufsz=3*recsize;
968 if ((buf = (char *) malloc(bufsz)) == NULL)
969 FATAL("out of memory in awksprintf");
970 y = a[0]->nnext;
971 x = execute(a[0]);
972 if (format(&buf, &bufsz, getsval(x), y) == -1)
973 FATAL("sprintf string %.30s... too long. can't happen.", buf);
974 tempfree(x);
975 x = gettemp();
976 x->sval = buf;
977 x->tval = STR;
978 return(x);
981 Cell *awkprintf(Node **a, int n) /* printf */
982 { /* a[0] is list of args, starting with format string */
983 /* a[1] is redirection operator, a[2] is redirection file */
984 FILE *fp;
985 Cell *x;
986 Node *y;
987 char *buf;
988 int len;
989 int bufsz=3*recsize;
991 if ((buf = (char *) malloc(bufsz)) == NULL)
992 FATAL("out of memory in awkprintf");
993 y = a[0]->nnext;
994 x = execute(a[0]);
995 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
996 FATAL("printf string %.30s... too long. can't happen.", buf);
997 tempfree(x);
998 if (a[1] == NULL) {
999 /* fputs(buf, stdout); */
1000 fwrite(buf, len, 1, stdout);
1001 if (ferror(stdout))
1002 FATAL("write error on stdout");
1003 } else {
1004 fp = redirect(ptoi(a[1]), a[2]);
1005 /* fputs(buf, fp); */
1006 fwrite(buf, len, 1, fp);
1007 fflush(fp);
1008 if (ferror(fp))
1009 FATAL("write error on %s", filename(fp));
1011 free(buf);
1012 return(True);
1015 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1017 Awkfloat i, j = 0;
1018 double v;
1019 Cell *x, *y, *z;
1021 x = execute(a[0]);
1022 i = getfval(x);
1023 tempfree(x);
1024 if (n != UMINUS) {
1025 y = execute(a[1]);
1026 j = getfval(y);
1027 tempfree(y);
1029 z = gettemp();
1030 switch (n) {
1031 case ADD:
1032 i += j;
1033 break;
1034 case MINUS:
1035 i -= j;
1036 break;
1037 case MULT:
1038 i *= j;
1039 break;
1040 case DIVIDE:
1041 if (j == 0)
1042 FATAL("division by zero");
1043 i /= j;
1044 break;
1045 case MOD:
1046 if (j == 0)
1047 FATAL("division by zero in mod");
1048 modf(i/j, &v);
1049 i = i - j * v;
1050 break;
1051 case UMINUS:
1052 i = -i;
1053 break;
1054 case POWER:
1055 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1056 i = ipow(i, (int) j);
1057 else
1058 i = errcheck(pow(i, j), "pow");
1059 break;
1060 default: /* can't happen */
1061 FATAL("illegal arithmetic operator %d", n);
1063 setfval(z, i);
1064 return(z);
1067 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1069 double v;
1071 if (n <= 0)
1072 return 1;
1073 v = ipow(x, n/2);
1074 if (n % 2 == 0)
1075 return v * v;
1076 else
1077 return x * v * v;
1080 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1082 Cell *x, *z;
1083 int k;
1084 Awkfloat xf;
1086 x = execute(a[0]);
1087 xf = getfval(x);
1088 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1089 if (n == PREINCR || n == PREDECR) {
1090 setfval(x, xf + k);
1091 return(x);
1093 z = gettemp();
1094 setfval(z, xf);
1095 setfval(x, xf + k);
1096 tempfree(x);
1097 return(z);
1100 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1101 { /* this is subtle; don't muck with it. */
1102 Cell *x, *y;
1103 Awkfloat xf, yf;
1104 double v;
1106 y = execute(a[1]);
1107 x = execute(a[0]);
1108 if (n == ASSIGN) { /* ordinary assignment */
1109 if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */
1110 ; /* leave alone unless it's a field */
1111 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1112 setsval(x, getsval(y));
1113 x->fval = getfval(y);
1114 x->tval |= NUM;
1116 else if (isstr(y))
1117 setsval(x, getsval(y));
1118 else if (isnum(y))
1119 setfval(x, getfval(y));
1120 else
1121 funnyvar(y, "read value of");
1122 tempfree(y);
1123 return(x);
1125 xf = getfval(x);
1126 yf = getfval(y);
1127 switch (n) {
1128 case ADDEQ:
1129 xf += yf;
1130 break;
1131 case SUBEQ:
1132 xf -= yf;
1133 break;
1134 case MULTEQ:
1135 xf *= yf;
1136 break;
1137 case DIVEQ:
1138 if (yf == 0)
1139 FATAL("division by zero in /=");
1140 xf /= yf;
1141 break;
1142 case MODEQ:
1143 if (yf == 0)
1144 FATAL("division by zero in %%=");
1145 modf(xf/yf, &v);
1146 xf = xf - yf * v;
1147 break;
1148 case POWEQ:
1149 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1150 xf = ipow(xf, (int) yf);
1151 else
1152 xf = errcheck(pow(xf, yf), "pow");
1153 break;
1154 default:
1155 FATAL("illegal assignment operator %d", n);
1156 break;
1158 tempfree(y);
1159 setfval(x, xf);
1160 return(x);
1163 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1165 Cell *x, *y, *z;
1166 int n1, n2;
1167 char *s;
1169 x = execute(a[0]);
1170 y = execute(a[1]);
1171 getsval(x);
1172 getsval(y);
1173 n1 = strlen(x->sval);
1174 n2 = strlen(y->sval);
1175 s = (char *) malloc(n1 + n2 + 1);
1176 if (s == NULL)
1177 FATAL("out of space concatenating %.15s... and %.15s...",
1178 x->sval, y->sval);
1179 strcpy(s, x->sval);
1180 strcpy(s+n1, y->sval);
1181 tempfree(y);
1182 z = gettemp();
1183 z->sval = s;
1184 z->tval = STR;
1185 tempfree(x);
1186 return(z);
1189 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1191 Cell *x;
1193 if (a[0] == 0)
1194 x = execute(a[1]);
1195 else {
1196 x = execute(a[0]);
1197 if (istrue(x)) {
1198 tempfree(x);
1199 x = execute(a[1]);
1202 return x;
1205 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1207 Cell *x;
1208 int pair;
1210 pair = ptoi(a[3]);
1211 if (pairstack[pair] == 0) {
1212 x = execute(a[0]);
1213 if (istrue(x))
1214 pairstack[pair] = 1;
1215 tempfree(x);
1217 if (pairstack[pair] == 1) {
1218 x = execute(a[1]);
1219 if (istrue(x))
1220 pairstack[pair] = 0;
1221 tempfree(x);
1222 x = execute(a[2]);
1223 return(x);
1225 return(False);
1228 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1230 Cell *x = 0, *y, *ap;
1231 char *s;
1232 int sep;
1233 char *t, temp, num[50], *fs = 0;
1234 int n, tempstat, arg3type;
1236 y = execute(a[0]); /* source string */
1237 s = getsval(y);
1238 arg3type = ptoi(a[3]);
1239 if (a[2] == 0) /* fs string */
1240 fs = *FS;
1241 else if (arg3type == STRING) { /* split(str,arr,"string") */
1242 x = execute(a[2]);
1243 fs = getsval(x);
1244 } else if (arg3type == REGEXPR)
1245 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1246 else
1247 FATAL("illegal type of split");
1248 sep = *fs;
1249 ap = execute(a[1]); /* array name */
1250 freesymtab(ap);
1251 dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1252 ap->tval &= ~STR;
1253 ap->tval |= ARR;
1254 ap->sval = (char *) makesymtab(NSYMTAB);
1256 n = 0;
1257 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1258 fa *pfa;
1259 if (arg3type == REGEXPR) { /* it's ready already */
1260 pfa = (fa *) a[2];
1261 } else {
1262 pfa = makedfa(fs, 1);
1264 if (nematch(pfa,s)) {
1265 tempstat = pfa->initstat;
1266 pfa->initstat = 2;
1267 do {
1268 n++;
1269 snprintf(num, sizeof(num), "%d", n);
1270 temp = *patbeg;
1271 *patbeg = '\0';
1272 if (is_number(s))
1273 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1274 else
1275 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1276 *patbeg = temp;
1277 s = patbeg + patlen;
1278 if (*(patbeg+patlen-1) == 0 || *s == 0) {
1279 n++;
1280 snprintf(num, sizeof(num), "%d", n);
1281 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1282 pfa->initstat = tempstat;
1283 goto spdone;
1285 } while (nematch(pfa,s));
1286 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1287 /* cf gsub and refldbld */
1289 n++;
1290 snprintf(num, sizeof(num), "%d", n);
1291 if (is_number(s))
1292 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1293 else
1294 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1295 spdone:
1296 pfa = NULL;
1297 } else if (sep == ' ') {
1298 for (n = 0; ; ) {
1299 while (*s == ' ' || *s == '\t' || *s == '\n')
1300 s++;
1301 if (*s == 0)
1302 break;
1303 n++;
1304 t = s;
1306 s++;
1307 while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
1308 temp = *s;
1309 *s = '\0';
1310 snprintf(num, sizeof(num), "%d", n);
1311 if (is_number(t))
1312 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1313 else
1314 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1315 *s = temp;
1316 if (*s != 0)
1317 s++;
1319 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1320 for (n = 0; *s != 0; s++) {
1321 char buf[2];
1322 n++;
1323 snprintf(num, sizeof(num), "%d", n);
1324 buf[0] = *s;
1325 buf[1] = 0;
1326 if (isdigit((uschar)buf[0]))
1327 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1328 else
1329 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1331 } else if (*s != 0) {
1332 for (;;) {
1333 n++;
1334 t = s;
1335 while (*s != sep && *s != '\n' && *s != '\0')
1336 s++;
1337 temp = *s;
1338 *s = '\0';
1339 snprintf(num, sizeof(num), "%d", n);
1340 if (is_number(t))
1341 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1342 else
1343 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1344 *s = temp;
1345 if (*s++ == 0)
1346 break;
1349 tempfree(ap);
1350 tempfree(y);
1351 if (a[2] != 0 && arg3type == STRING) {
1352 tempfree(x);
1354 x = gettemp();
1355 x->tval = NUM;
1356 x->fval = n;
1357 return(x);
1360 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1362 Cell *x;
1364 x = execute(a[0]);
1365 if (istrue(x)) {
1366 tempfree(x);
1367 x = execute(a[1]);
1368 } else {
1369 tempfree(x);
1370 x = execute(a[2]);
1372 return(x);
1375 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1377 Cell *x;
1379 x = execute(a[0]);
1380 if (istrue(x)) {
1381 tempfree(x);
1382 x = execute(a[1]);
1383 } else if (a[2] != 0) {
1384 tempfree(x);
1385 x = execute(a[2]);
1387 return(x);
1390 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1392 Cell *x;
1394 for (;;) {
1395 x = execute(a[0]);
1396 if (!istrue(x))
1397 return(x);
1398 tempfree(x);
1399 x = execute(a[1]);
1400 if (isbreak(x)) {
1401 x = True;
1402 return(x);
1404 if (isnext(x) || isexit(x) || isret(x))
1405 return(x);
1406 tempfree(x);
1410 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1412 Cell *x;
1414 for (;;) {
1415 x = execute(a[0]);
1416 if (isbreak(x))
1417 return True;
1418 if (isnext(x) || isexit(x) || isret(x))
1419 return(x);
1420 tempfree(x);
1421 x = execute(a[1]);
1422 if (!istrue(x))
1423 return(x);
1424 tempfree(x);
1428 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1430 Cell *x;
1432 x = execute(a[0]);
1433 tempfree(x);
1434 for (;;) {
1435 if (a[1]!=0) {
1436 x = execute(a[1]);
1437 if (!istrue(x)) return(x);
1438 else tempfree(x);
1440 x = execute(a[3]);
1441 if (isbreak(x)) /* turn off break */
1442 return True;
1443 if (isnext(x) || isexit(x) || isret(x))
1444 return(x);
1445 tempfree(x);
1446 x = execute(a[2]);
1447 tempfree(x);
1451 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1453 Cell *x, *vp, *arrayp, *cp, *ncp;
1454 Array *tp;
1455 int i;
1457 vp = execute(a[0]);
1458 arrayp = execute(a[1]);
1459 if (!isarr(arrayp)) {
1460 return True;
1462 tp = (Array *) arrayp->sval;
1463 tempfree(arrayp);
1464 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1465 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1466 setsval(vp, cp->nval);
1467 ncp = cp->cnext;
1468 x = execute(a[2]);
1469 if (isbreak(x)) {
1470 tempfree(vp);
1471 return True;
1473 if (isnext(x) || isexit(x) || isret(x)) {
1474 tempfree(vp);
1475 return(x);
1477 tempfree(x);
1480 return True;
1483 void flush_all(void);
1485 static char *nawk_toXXX(const char *s,
1486 int (*fun_c)(int),
1487 wint_t (*fun_wc)(wint_t))
1489 char *buf = NULL;
1490 char *pbuf = NULL;
1491 const char *ps = NULL;
1492 size_t n = 0;
1493 mbstate_t mbs, mbs2;
1494 wchar_t wc;
1495 size_t sz = MB_CUR_MAX;
1497 if (sz == 1) {
1498 buf = tostring(s);
1500 for (pbuf = buf; *pbuf; pbuf++)
1501 *pbuf = fun_c((uschar)*pbuf);
1503 return buf;
1504 } else {
1505 /* upper/lower character may be shorter/longer */
1506 buf = tostringN(s, strlen(s) * sz + 1);
1508 memset(&mbs, 0, sizeof(mbs));
1509 memset(&mbs2, 0, sizeof(mbs2));
1511 ps = s;
1512 pbuf = buf;
1513 while (n = mbrtowc(&wc, ps, sz, &mbs),
1514 n > 0 && n != (size_t)-1 && n != (size_t)-2)
1516 ps += n;
1518 n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
1519 if (n == (size_t)-1)
1520 FATAL("illegal wide character %s", s);
1522 pbuf += n;
1525 *pbuf = 0;
1527 if (n)
1528 FATAL("illegal byte sequence %s", s);
1530 return buf;
1534 static char *nawk_toupper(const char *s)
1536 return nawk_toXXX(s, toupper, towupper);
1539 static char *nawk_tolower(const char *s)
1541 return nawk_toXXX(s, tolower, towlower);
1544 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
1546 Cell *x, *y;
1547 Awkfloat u;
1548 int t, sz;
1549 char *buf, *fmt;
1550 Node *nextarg;
1551 FILE *fp;
1552 time_t tv;
1553 struct tm *tm;
1555 t = ptoi(a[0]);
1556 x = execute(a[1]);
1557 nextarg = a[1]->nnext;
1558 switch (t) {
1559 case FLENGTH:
1560 if (isarr(x))
1561 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
1562 else
1563 u = strlen(getsval(x));
1564 break;
1565 case FLOG:
1566 u = errcheck(log(getfval(x)), "log"); break;
1567 case FINT:
1568 modf(getfval(x), &u); break;
1569 case FEXP:
1570 u = errcheck(exp(getfval(x)), "exp"); break;
1571 case FSQRT:
1572 u = errcheck(sqrt(getfval(x)), "sqrt"); break;
1573 case FSIN:
1574 u = sin(getfval(x)); break;
1575 case FCOS:
1576 u = cos(getfval(x)); break;
1577 case FATAN:
1578 if (nextarg == 0) {
1579 WARNING("atan2 requires two arguments; returning 1.0");
1580 u = 1.0;
1581 } else {
1582 y = execute(a[1]->nnext);
1583 u = atan2(getfval(x), getfval(y));
1584 tempfree(y);
1585 nextarg = nextarg->nnext;
1587 break;
1588 case FSYSTEM:
1589 fflush(stdout); /* in case something is buffered already */
1590 u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */
1591 break;
1592 case FRAND:
1593 /* in principle, rand() returns something in 0..RAND_MAX */
1594 u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
1595 break;
1596 case FSRAND:
1597 if (isrec(x)) /* no argument provided */
1598 u = time((time_t *)0);
1599 else
1600 u = getfval(x);
1601 srand((unsigned int) u);
1602 break;
1603 case FTOUPPER:
1604 case FTOLOWER:
1605 if (t == FTOUPPER)
1606 buf = nawk_toupper(getsval(x));
1607 else
1608 buf = nawk_tolower(getsval(x));
1609 tempfree(x);
1610 x = gettemp();
1611 setsval(x, buf);
1612 free(buf);
1613 return x;
1614 case FFLUSH:
1615 if (isrec(x) || strlen(getsval(x)) == 0) {
1616 flush_all(); /* fflush() or fflush("") -> all */
1617 u = 0;
1618 } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
1619 u = -1;
1620 else
1621 u = fflush(fp);
1622 break;
1623 case FSYSTIME:
1624 u = time((time_t *) 0); break;
1625 case FSTRFTIME:
1626 /* strftime([format [,timestamp]]) */
1627 if (nextarg) {
1628 y = execute(nextarg), nextarg = nextarg->nnext;
1629 tv = (time_t) getfval(y);
1630 tempfree(y);
1631 } else
1632 tv = time((time_t *) 0);
1633 tm = localtime(&tv);
1635 if (isrec(x)) {
1636 /* format argument not provided, use default */
1637 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1638 } else
1639 fmt = tostring(getsval(x));
1641 sz = 32, buf = NULL;
1642 do {
1643 if ((buf = realloc(buf, (sz *= 2))) == NULL)
1644 FATAL("out of memory in strftime");
1645 } while(strftime(buf, sz, fmt, tm) == 0);
1647 y = gettemp();
1648 setsval(y, buf);
1649 free(fmt);
1650 free(buf);
1652 return y;
1653 default: /* can't happen */
1654 FATAL("illegal function type %d", t);
1655 break;
1657 tempfree(x);
1658 x = gettemp();
1659 setfval(x, u);
1660 if (nextarg != 0) {
1661 WARNING("warning: function has too many arguments");
1662 for ( ; nextarg; nextarg = nextarg->nnext)
1663 execute(nextarg);
1665 return(x);
1668 Cell *printstat(Node **a, int n) /* print a[0] */
1670 Node *x;
1671 Cell *y;
1672 FILE *fp;
1674 if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */
1675 fp = stdout;
1676 else
1677 fp = redirect(ptoi(a[1]), a[2]);
1678 for (x = a[0]; x != NULL; x = x->nnext) {
1679 y = execute(x);
1680 fputs(getpssval(y), fp);
1681 tempfree(y);
1682 if (x->nnext == NULL)
1683 fputs(*ORS, fp);
1684 else
1685 fputs(*OFS, fp);
1687 if (a[1] != 0)
1688 fflush(fp);
1689 if (ferror(fp))
1690 FATAL("write error on %s", filename(fp));
1691 return(True);
1694 Cell *nullproc(Node **a, int n)
1696 n = n;
1697 a = a;
1698 return 0;
1702 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
1704 FILE *fp;
1705 Cell *x;
1706 char *fname;
1708 x = execute(b);
1709 fname = getsval(x);
1710 fp = openfile(a, fname);
1711 if (fp == NULL)
1712 FATAL("can't open file %s", fname);
1713 tempfree(x);
1714 return fp;
1717 struct files {
1718 FILE *fp;
1719 const char *fname;
1720 int mode; /* '|', 'a', 'w' => LE/LT, GT */
1721 } *files;
1722 size_t nfiles;
1724 void stdinit(void) /* in case stdin, etc., are not constants */
1726 nfiles = FOPEN_MAX;
1727 files = calloc(nfiles, sizeof(*files));
1728 if (files == NULL)
1729 FATAL("can't allocate file memory for %zu files", nfiles);
1730 files[0].fp = stdin;
1731 files[0].fname = "/dev/stdin";
1732 files[0].mode = LT;
1733 files[1].fp = stdout;
1734 files[1].fname = "/dev/stdout";
1735 files[1].mode = GT;
1736 files[2].fp = stderr;
1737 files[2].fname = "/dev/stderr";
1738 files[2].mode = GT;
1741 FILE *openfile(int a, const char *us)
1743 const char *s = us;
1744 size_t i;
1745 int m;
1746 FILE *fp = 0;
1748 if (*s == '\0')
1749 FATAL("null file name in print or get_line");
1750 for (i = 0; i < nfiles; i++)
1751 if (files[i].fname && strcmp(s, files[i].fname) == 0) {
1752 if (a == files[i].mode || (a==APPEND && files[i].mode==GT))
1753 return files[i].fp;
1754 if (a == FFLUSH)
1755 return files[i].fp;
1757 if (a == FFLUSH) /* didn't find it, so don't create it! */
1758 return NULL;
1760 for (i = 0; i < nfiles; i++)
1761 if (files[i].fp == NULL)
1762 break;
1763 if (i >= nfiles) {
1764 struct files *nf;
1765 size_t nnf = nfiles + FOPEN_MAX;
1766 nf = realloc(files, nnf * sizeof(*nf));
1767 if (nf == NULL)
1768 FATAL("cannot grow files for %s and %zu files", s, nnf);
1769 (void)memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1770 nfiles = nnf;
1771 files = nf;
1773 fflush(stdout); /* force a semblance of order */
1774 m = a;
1775 if (a == GT) {
1776 fp = fopen(s, "w");
1777 } else if (a == APPEND) {
1778 fp = fopen(s, "a");
1779 m = GT; /* so can mix > and >> */
1780 } else if (a == '|') { /* output pipe */
1781 fp = popen(s, "w");
1782 } else if (a == LE) { /* input pipe */
1783 fp = popen(s, "r");
1784 } else if (a == LT) { /* get_line <file */
1785 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
1786 } else /* can't happen */
1787 FATAL("illegal redirection %d", a);
1788 if (fp != NULL) {
1789 files[i].fname = tostring(s);
1790 files[i].fp = fp;
1791 files[i].mode = m;
1793 return fp;
1796 const char *filename(FILE *fp)
1798 size_t i;
1800 for (i = 0; i < nfiles; i++)
1801 if (fp == files[i].fp)
1802 return files[i].fname;
1803 return "???";
1806 Cell *closefile(Node **a, int n)
1808 Cell *x;
1809 size_t i;
1810 int stat;
1812 n = n;
1813 x = execute(a[0]);
1814 getsval(x);
1815 stat = -1;
1816 for (i = 0; i < nfiles; i++) {
1817 if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
1818 if (ferror(files[i].fp))
1819 WARNING( "i/o error occurred on %s", files[i].fname );
1820 if (files[i].mode == '|' || files[i].mode == LE)
1821 stat = pclose(files[i].fp) == -1;
1822 else
1823 stat = fclose(files[i].fp) == EOF;
1824 if (stat) {
1825 stat = -1;
1826 WARNING( "i/o error occurred closing %s",
1827 files[i].fname );
1829 if (i > 2) /* don't do /dev/std... */
1830 xfree(files[i].fname);
1831 files[i].fname = NULL; /* watch out for ref thru this */
1832 files[i].fp = NULL;
1835 tempfree(x);
1836 x = gettemp();
1837 setfval(x, (Awkfloat) stat);
1838 return(x);
1841 void closeall(void)
1843 size_t i;
1844 int stat;
1846 for (i = 0; i < nfiles; i++) {
1847 if (files[i].fp) {
1848 if (ferror(files[i].fp))
1849 WARNING( "i/o error occurred on %s", files[i].fname );
1850 if (i == 0)
1851 stat = fpurge(files[i].fp) == EOF;
1852 else if (i <= 2)
1853 stat = fflush(files[i].fp) == EOF;
1854 else if (files[i].mode == '|' || files[i].mode == LE)
1855 stat = pclose(files[i].fp) == -1;
1856 else
1857 stat = fclose(files[i].fp) == EOF;
1858 if (stat)
1859 WARNING( "i/o error occurred while closing %s", files[i].fname );
1864 void flush_all(void)
1866 size_t i;
1868 for (i = 0; i < nfiles; i++)
1869 if (files[i].fp)
1870 fflush(files[i].fp);
1873 void backsub(uschar **pb_ptr, const uschar **sptr_ptr);
1875 Cell *sub(Node **a, int nnn) /* substitute command */
1877 const uschar *sptr;
1878 uschar *q;
1879 Cell *x, *y, *result;
1880 uschar *t, *buf, *pb;
1881 fa *pfa;
1882 int bufsz = recsize;
1884 if ((buf = (char *) malloc(bufsz)) == NULL)
1885 FATAL("out of memory in sub");
1886 x = execute(a[3]); /* target string */
1887 t = getsval(x);
1888 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1889 pfa = (fa *) a[1]; /* regular expression */
1890 else {
1891 y = execute(a[1]);
1892 pfa = makedfa(getsval(y), 1);
1893 tempfree(y);
1895 y = execute(a[2]); /* replacement string */
1896 result = False;
1897 if (pmatch(pfa, t)) {
1898 sptr = t;
1899 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1900 pb = buf;
1901 while (sptr < patbeg)
1902 *pb++ = *sptr++;
1903 sptr = getsval(y);
1904 while (*sptr != 0) {
1905 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
1906 if (*sptr == '\\') {
1907 backsub(&pb, &sptr);
1908 } else if (*sptr == '&') {
1909 sptr++;
1910 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1911 for (q = patbeg; q < patbeg+patlen; )
1912 *pb++ = *q++;
1913 } else
1914 *pb++ = *sptr++;
1916 *pb = '\0';
1917 if (pb > buf + bufsz)
1918 FATAL("sub result1 %.30s too big; can't happen", buf);
1919 sptr = patbeg + patlen;
1920 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
1921 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1922 while ((*pb++ = *sptr++) != 0)
1925 if (pb > buf + bufsz)
1926 FATAL("sub result2 %.30s too big; can't happen", buf);
1927 setsval(x, buf); /* BUG: should be able to avoid copy */
1928 result = True;;
1930 tempfree(x);
1931 tempfree(y);
1932 free(buf);
1933 return result;
1936 Cell *gsub(Node **a, int nnn) /* global substitute */
1938 Cell *x, *y;
1939 const char *rptr;
1940 const uschar *sptr;
1941 uschar *t, *q;
1942 uschar *pb, *buf;
1943 fa *pfa;
1944 int mflag, tempstat, num;
1945 int bufsz = recsize;
1947 if ((buf = (char *) malloc(bufsz)) == NULL)
1948 FATAL("out of memory in gsub");
1949 mflag = 0; /* if mflag == 0, can replace empty string */
1950 num = 0;
1951 x = execute(a[3]); /* target string */
1952 t = getsval(x);
1953 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
1954 pfa = (fa *) a[1]; /* regular expression */
1955 else {
1956 y = execute(a[1]);
1957 pfa = makedfa(getsval(y), 1);
1958 tempfree(y);
1960 y = execute(a[2]); /* replacement string */
1961 if (pmatch(pfa, t)) {
1962 tempstat = pfa->initstat;
1963 pfa->initstat = 2;
1964 pb = buf;
1965 rptr = getsval(y);
1966 do {
1967 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
1968 if (mflag == 0) { /* can replace empty */
1969 num++;
1970 sptr = rptr;
1971 while (*sptr != 0) {
1972 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1973 if (*sptr == '\\') {
1974 backsub(&pb, &sptr);
1975 } else if (*sptr == '&') {
1976 sptr++;
1977 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1978 for (q = patbeg; q < patbeg+patlen; )
1979 *pb++ = *q++;
1980 } else
1981 *pb++ = *sptr++;
1984 if (*t == 0) /* at end */
1985 goto done;
1986 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
1987 *pb++ = *t++;
1988 if (pb > buf + bufsz) /* BUG: not sure of this test */
1989 FATAL("gsub result0 %.30s too big; can't happen", buf);
1990 mflag = 0;
1992 else { /* matched nonempty string */
1993 num++;
1994 sptr = t;
1995 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
1996 while (sptr < patbeg)
1997 *pb++ = *sptr++;
1998 sptr = rptr;
1999 while (*sptr != 0) {
2000 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2001 if (*sptr == '\\') {
2002 backsub(&pb, &sptr);
2003 } else if (*sptr == '&') {
2004 sptr++;
2005 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2006 for (q = patbeg; q < patbeg+patlen; )
2007 *pb++ = *q++;
2008 } else
2009 *pb++ = *sptr++;
2011 t = patbeg + patlen;
2012 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2013 goto done;
2014 if (pb > buf + bufsz)
2015 FATAL("gsub result1 %.30s too big; can't happen", buf);
2016 mflag = 1;
2018 } while (pmatch(pfa,t));
2019 sptr = t;
2020 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2021 while ((*pb++ = *sptr++) != 0)
2023 done: if (pb > buf + bufsz)
2024 FATAL("gsub result2 %.30s too big; can't happen", buf);
2025 *pb = '\0';
2026 setsval(x, buf); /* BUG: should be able to avoid copy + free */
2027 pfa->initstat = tempstat;
2029 tempfree(x);
2030 tempfree(y);
2031 x = gettemp();
2032 x->tval = NUM;
2033 x->fval = num;
2034 free(buf);
2035 return(x);
2038 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2039 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2041 Cell *x, *y, *res, *h;
2042 char *rptr;
2043 const uschar *sptr;
2044 uschar *q, *pb, *t, *buf;
2045 fa *pfa;
2046 int mflag, tempstat, num, whichm;
2047 int bufsz = recsize;
2049 if ((buf = (char *) malloc(bufsz)) == NULL)
2050 FATAL("out of memory in gensub");
2051 mflag = 0; /* if mflag == 0, can replace empty string */
2052 num = 0;
2053 x = execute(a[4]); /* source string */
2054 t = getsval(x);
2055 res = copycell(x); /* target string - initially copy of source */
2056 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2057 pfa = (fa *) a[1]; /* regular expression */
2058 else {
2059 y = execute(a[1]);
2060 pfa = makedfa(getsval(y), 1);
2061 tempfree(y);
2063 y = execute(a[2]); /* replacement string */
2064 h = execute(a[3]); /* which matches should be replaced */
2065 sptr = getsval(h);
2066 if (sptr[0] == 'g' || sptr[0] == 'G')
2067 whichm = -1;
2068 else {
2070 * The specified number is index of replacement, starting
2071 * from 1. GNU awk treats index lower than 0 same as
2072 * 1, we do same for compatibility.
2074 whichm = (int) getfval(h) - 1;
2075 if (whichm < 0)
2076 whichm = 0;
2078 tempfree(h);
2080 if (pmatch(pfa, t)) {
2081 char *sl;
2083 tempstat = pfa->initstat;
2084 pfa->initstat = 2;
2085 pb = buf;
2086 rptr = getsval(y);
2088 * XXX if there are any backreferences in subst string,
2089 * complain now.
2091 for(sl=rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2092 if (strchr("0123456789", sl[1])) {
2093 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2097 do {
2098 if (whichm >= 0 && whichm != num) {
2099 num++;
2100 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2102 /* copy the part of string up to and including
2103 * match to output buffer */
2104 while (t < patbeg + patlen)
2105 *pb++ = *t++;
2106 continue;
2109 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2110 if (mflag == 0) { /* can replace empty */
2111 num++;
2112 sptr = rptr;
2113 while (*sptr != 0) {
2114 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2115 if (*sptr == '\\') {
2116 backsub(&pb, &sptr);
2117 } else if (*sptr == '&') {
2118 sptr++;
2119 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2120 for (q = patbeg; q < patbeg+patlen; )
2121 *pb++ = *q++;
2122 } else
2123 *pb++ = *sptr++;
2126 if (*t == 0) /* at end */
2127 goto done;
2128 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2129 *pb++ = *t++;
2130 if (pb > buf + bufsz) /* BUG: not sure of this test */
2131 FATAL("gensub result0 %.30s too big; can't happen", buf);
2132 mflag = 0;
2134 else { /* matched nonempty string */
2135 num++;
2136 sptr = t;
2137 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2138 while (sptr < patbeg)
2139 *pb++ = *sptr++;
2140 sptr = rptr;
2141 while (*sptr != 0) {
2142 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2143 if (*sptr == '\\') {
2144 backsub(&pb, &sptr);
2145 } else if (*sptr == '&') {
2146 sptr++;
2147 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2148 for (q = patbeg; q < patbeg+patlen; )
2149 *pb++ = *q++;
2150 } else
2151 *pb++ = *sptr++;
2153 t = patbeg + patlen;
2154 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2155 goto done;
2156 if (pb > buf + bufsz)
2157 FATAL("gensub result1 %.30s too big; can't happen", buf);
2158 mflag = 1;
2160 } while (pmatch(pfa,t));
2161 sptr = t;
2162 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2163 while ((*pb++ = *sptr++) != 0)
2165 done: if (pb > buf + bufsz)
2166 FATAL("gensub result2 %.30s too big; can't happen", buf);
2167 *pb = '\0';
2168 setsval(res, buf);
2169 pfa->initstat = tempstat;
2171 tempfree(x);
2172 tempfree(y);
2173 free(buf);
2174 return(res);
2177 void backsub(uschar **pb_ptr, const uschar **sptr_ptr)/* handle \\& variations */
2178 { /* sptr[0] == '\\' */
2179 uschar *pb = *pb_ptr;
2180 const uschar *sptr = *sptr_ptr;
2182 if (sptr[1] == '\\') {
2183 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2184 *pb++ = '\\';
2185 *pb++ = '&';
2186 sptr += 4;
2187 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2188 *pb++ = '\\';
2189 sptr += 2;
2190 } else { /* \\x -> \\x */
2191 *pb++ = *sptr++;
2192 *pb++ = *sptr++;
2194 } else if (sptr[1] == '&') { /* literal & */
2195 sptr++;
2196 *pb++ = *sptr++;
2197 } else /* literal \ */
2198 *pb++ = *sptr++;
2200 *pb_ptr = pb;
2201 *sptr_ptr = sptr;