Tomato 1.25
[tomato.git] / release / src / router / busybox / editors / awk.c
blob89ce2cfc81e9d39045c92067c1ca911ca8d06a72
1 /* vi: set sw=4 ts=4: */
2 /*
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8 */
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
14 /* This is a NOEXEC applet. Be very careful! */
17 #define MAXVARFMT 240
18 #define MINNVBLOCK 64
20 /* variable flags */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
35 /* Variable */
36 typedef struct var_s {
37 unsigned type; /* flags */
38 double number;
39 char *string;
40 union {
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
45 } x;
46 } var;
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50 struct node_s *first;
51 struct node_s *last;
52 const char *programname;
53 } chain;
55 /* Function */
56 typedef struct func_s {
57 unsigned nargs;
58 struct chain_s body;
59 } func;
61 /* I/O stream */
62 typedef struct rstream_s {
63 FILE *F;
64 char *buffer;
65 int adv;
66 int size;
67 int pos;
68 smallint is_pipe;
69 } rstream;
71 typedef struct hash_item_s {
72 union {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
76 } data;
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
79 } hash_item;
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
87 } xhash;
89 /* Tree node */
90 typedef struct node_s {
91 uint32_t info;
92 unsigned lineno;
93 union {
94 struct node_s *n;
95 var *v;
96 int i;
97 char *s;
98 regex_t *re;
99 } l;
100 union {
101 struct node_s *n;
102 regex_t *ire;
103 func *f;
104 int argno;
105 } r;
106 union {
107 struct node_s *n;
108 } a;
109 } node;
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113 int size;
114 var *pos;
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
117 var nv[0];
118 } nvblock;
120 typedef struct tsplitter_s {
121 node n;
122 regex_t re[2];
123 } tsplitter;
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
196 #define xx 0
197 #define xV OF_RES2
198 #define xS (OF_RES2 | OF_STR2)
199 #define Vx OF_RES1
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
223 enum {
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
239 OC_DONE = 0x2800,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
242 ST_WHILE = 0x3300
245 /* simple builtins */
246 enum {
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
251 /* builtins */
252 enum {
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
254 B_ge, B_gs, B_su,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define NTCC '\377'
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
266 "\1(" NTC
267 "\1)" NTC
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
279 "\2in" NTC
280 "\1," NTC
281 "\1|" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
283 "\1]" NTC
284 "\1{" NTC
285 "\1}" NTC
286 "\1;" NTC
287 "\1\n" NTC
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
292 "\5while" NTC
293 "\4else" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
296 "\6rshift" "\3xor"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
304 "\7getline" NTC
305 "\4func" "\10function" NTC
306 "\5BEGIN" NTC
307 "\3END" "\0"
310 static const uint32_t tokeninfo[] = {
313 OC_REGEXP,
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_FIELD|xV|P(5),
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
332 OC_IN|SV|P(49),
333 OC_COMMA|SS|P(80),
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
346 ST_WHILE,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358 OC_GETLINE|SV|P(0),
359 0, 0,
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, F0, ARGIND, ARGC,
370 ARGV, ERRNO, FNR, NR,
371 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
374 static const char vNames[] ALIGN1 =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
378 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
379 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
381 static const char vValues[] ALIGN1 =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0" "\0" "\377";
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
391 /* Globals. Split in two parts so that first one is addressed
392 * with (mostly short) negative offsets.
393 * NB: it's unsafe to put members of type "double"
394 * into globals2 (gcc may fail to align them).
396 struct globals {
397 double t_double;
398 chain beginseq, mainseq, endseq;
399 chain *seq;
400 node *break_ptr, *continue_ptr;
401 rstream *iF;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
404 int g_lineno;
405 int nfields;
406 int maxfields; /* used in fsrealloc() only */
407 var *Fields;
408 nvblock *g_cb;
409 char *g_pos;
410 char *g_buf;
411 smallint icase;
412 smallint exiting;
413 smallint nextrec;
414 smallint nextfile;
415 smallint is_f0_split;
417 struct globals2 {
418 uint32_t t_info; /* often used */
419 uint32_t t_tclass;
420 char *t_string;
421 int t_lineno;
422 int t_rollback;
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
426 /* former statics from various functions */
427 char *split_f0__fstrings;
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
441 var ptest__v;
443 tsplitter exec_builtin__tspl;
445 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double (G1.t_double )
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF (G1.iF )
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
492 } while (0)
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
519 static void zero_out_var(var * vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *const message) NORETURN;
525 static void syntax_error(const char *const message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
534 unsigned idx = 0;
536 while (*name) idx = *name++ + (idx << 6) - idx;
537 return idx;
540 /* create new hash */
541 static xhash *hash_init(void)
543 xhash *newhash;
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
549 return newhash;
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
555 hash_item *hi;
557 hi = hash->items [ hashidx(name) % hash->csize ];
558 while (hi) {
559 if (strcmp(hi->name, name) == 0)
560 return &(hi->data);
561 hi = hi->next;
563 return NULL;
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
573 return;
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
578 for (i = 0; i < hash->csize; i++) {
579 hi = hash->items[i];
580 while (hi) {
581 thi = hi;
582 hi = thi->next;
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
585 newitems[idx] = thi;
589 free(hash->items);
590 hash->csize = newsize;
591 hash->items = newitems;
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
597 hash_item *hi;
598 unsigned idx;
599 int l;
601 hi = hash_search(hash, name);
602 if (!hi) {
603 if (++hash->nel / hash->csize > 10)
604 hash_rebuild(hash);
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(*hi) + l);
608 strcpy(hi->name, name);
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
613 hash->glen += l;
615 return &(hi->data);
618 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
619 #define newvar(name) ((var*) hash_find(vhash, (name)))
620 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623 static void hash_remove(xhash *hash, const char *name)
625 hash_item *hi, **phi;
627 phi = &(hash->items[hashidx(name) % hash->csize]);
628 while (*phi) {
629 hi = *phi;
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
632 hash->nel--;
633 *phi = hi->next;
634 free(hi);
635 break;
637 phi = &(hi->next);
641 /* ------ some useful functions ------ */
643 static void skip_spaces(char **s)
645 char *p = *s;
647 while (1) {
648 if (*p == '\\' && p[1] == '\n') {
649 p++;
650 t_lineno++;
651 } else if (*p != ' ' && *p != '\t') {
652 break;
654 p++;
656 *s = p;
659 static char *nextword(char **s)
661 char *p = *s;
663 while (*(*s)++) /* */;
665 return p;
668 static char nextchar(char **s)
670 char c, *pps;
672 c = *((*s)++);
673 pps = *s;
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
676 return c;
679 static ALWAYS_INLINE int isalnum_(int c)
681 return (isalnum(c) || c == '_');
684 static double my_strtod(char **pp)
686 #if ENABLE_DESKTOP
687 if ((*pp)[0] == '0'
688 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
690 return strtoull(*pp, pp, 0);
692 #endif
693 return strtod(*pp, pp);
696 /* -------- working with variables (set/get/copy/etc) -------- */
698 static xhash *iamarray(var *v)
700 var *a = v;
702 while (a->type & VF_CHILD)
703 a = a->x.parent;
705 if (!(a->type & VF_ARRAY)) {
706 a->type |= VF_ARRAY;
707 a->x.array = hash_init();
709 return a->x.array;
712 static void clear_array(xhash *array)
714 unsigned i;
715 hash_item *hi, *thi;
717 for (i = 0; i < array->csize; i++) {
718 hi = array->items[i];
719 while (hi) {
720 thi = hi;
721 hi = hi->next;
722 free(thi->data.v.string);
723 free(thi);
725 array->items[i] = NULL;
727 array->glen = array->nel = 0;
730 /* clear a variable */
731 static var *clrvar(var *v)
733 if (!(v->type & VF_FSTR))
734 free(v->string);
736 v->type &= VF_DONTTOUCH;
737 v->type |= VF_DIRTY;
738 v->string = NULL;
739 return v;
742 /* assign string value to variable */
743 static var *setvar_p(var *v, char *value)
745 clrvar(v);
746 v->string = value;
747 handle_special(v);
748 return v;
751 /* same as setvar_p but make a copy of string */
752 static var *setvar_s(var *v, const char *value)
754 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 /* same as setvar_s but set USER flag */
758 static var *setvar_u(var *v, const char *value)
760 setvar_s(v, value);
761 v->type |= VF_USER;
762 return v;
765 /* set array element to user string */
766 static void setari_u(var *a, int idx, const char *s)
768 char sidx[sizeof(int)*3 + 1];
769 var *v;
771 sprintf(sidx, "%d", idx);
772 v = findvar(iamarray(a), sidx);
773 setvar_u(v, s);
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
779 clrvar(v);
780 v->type |= VF_NUMBER;
781 v->number = value;
782 handle_special(v);
783 return v;
786 static const char *getvar_s(var *v)
788 /* if v is numeric and has no cached string, convert it to string */
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791 v->string = xstrdup(g_buf);
792 v->type |= VF_CACHED;
794 return (v->string == NULL) ? "" : v->string;
797 static double getvar_i(var *v)
799 char *s;
801 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802 v->number = 0;
803 s = v->string;
804 if (s && *s) {
805 v->number = my_strtod(&s);
806 if (v->type & VF_USER) {
807 skip_spaces(&s);
808 if (*s != '\0')
809 v->type &= ~VF_USER;
811 } else {
812 v->type &= ~VF_USER;
814 v->type |= VF_CACHED;
816 return v->number;
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
822 double d = getvar_i(v);
824 /* Casting doubles to longs is undefined for values outside
825 * of target type range. Try to widen it as much as possible */
826 if (d >= 0)
827 return (unsigned long)d;
828 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829 return - (long) (unsigned long) (-d);
832 static var *copyvar(var *dest, const var *src)
834 if (dest != src) {
835 clrvar(dest);
836 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837 dest->number = src->number;
838 if (src->string)
839 dest->string = xstrdup(src->string);
841 handle_special(dest);
842 return dest;
845 static var *incvar(var *v)
847 return setvar_i(v, getvar_i(v) + 1.);
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
853 getvar_i(v);
854 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
860 if (is_numeric(v))
861 return (v->number == 0) ? 0 : 1;
862 return (v->string && *(v->string)) ? 1 : 0;
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
868 nvblock *pb = NULL;
869 var *v, *r;
870 int size;
872 while (g_cb) {
873 pb = g_cb;
874 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
875 g_cb = g_cb->next;
878 if (!g_cb) {
879 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
880 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
881 g_cb->size = size;
882 g_cb->pos = g_cb->nv;
883 g_cb->prev = pb;
884 /*g_cb->next = NULL; - xzalloc did it */
885 if (pb) pb->next = g_cb;
888 v = r = g_cb->pos;
889 g_cb->pos += n;
891 while (v < g_cb->pos) {
892 v->type = 0;
893 v->string = NULL;
894 v++;
897 return r;
900 static void nvfree(var *v)
902 var *p;
904 if (v < g_cb->nv || v >= g_cb->pos)
905 syntax_error(EMSG_INTERNAL_ERROR);
907 for (p = v; p < g_cb->pos; p++) {
908 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
909 clear_array(iamarray(p));
910 free(p->x.array->items);
911 free(p->x.array);
913 if (p->type & VF_WALK)
914 free(p->x.walker);
916 clrvar(p);
919 g_cb->pos = v;
920 while (g_cb->prev && g_cb->pos == g_cb->nv) {
921 g_cb = g_cb->prev;
925 /* ------- awk program text parsing ------- */
927 /* Parse next token pointed by global pos, place results into global ttt.
928 * If token isn't expected, give away. Return token class
930 static uint32_t next_token(uint32_t expected)
932 #define concat_inserted (G.next_token__concat_inserted)
933 #define save_tclass (G.next_token__save_tclass)
934 #define save_info (G.next_token__save_info)
935 /* Initialized to TC_OPTERM: */
936 #define ltclass (G.next_token__ltclass)
938 char *p, *pp, *s;
939 const char *tl;
940 uint32_t tc;
941 const uint32_t *ti;
942 int l;
944 if (t_rollback) {
945 t_rollback = FALSE;
947 } else if (concat_inserted) {
948 concat_inserted = FALSE;
949 t_tclass = save_tclass;
950 t_info = save_info;
952 } else {
953 p = g_pos;
954 readnext:
955 skip_spaces(&p);
956 g_lineno = t_lineno;
957 if (*p == '#')
958 while (*p != '\n' && *p != '\0')
959 p++;
961 if (*p == '\n')
962 t_lineno++;
964 if (*p == '\0') {
965 tc = TC_EOF;
967 } else if (*p == '\"') {
968 /* it's a string */
969 t_string = s = ++p;
970 while (*p != '\"') {
971 if (*p == '\0' || *p == '\n')
972 syntax_error(EMSG_UNEXP_EOS);
973 *(s++) = nextchar(&p);
975 p++;
976 *s = '\0';
977 tc = TC_STRING;
979 } else if ((expected & TC_REGEXP) && *p == '/') {
980 /* it's regexp */
981 t_string = s = ++p;
982 while (*p != '/') {
983 if (*p == '\0' || *p == '\n')
984 syntax_error(EMSG_UNEXP_EOS);
985 *s = *p++;
986 if (*s++ == '\\') {
987 pp = p;
988 *(s-1) = bb_process_escape_sequence((const char **)&p);
989 if (*pp == '\\')
990 *s++ = '\\';
991 if (p == pp)
992 *s++ = *p++;
995 p++;
996 *s = '\0';
997 tc = TC_REGEXP;
999 } else if (*p == '.' || isdigit(*p)) {
1000 /* it's a number */
1001 t_double = my_strtod(&p);
1002 if (*p == '.')
1003 syntax_error(EMSG_UNEXP_TOKEN);
1004 tc = TC_NUMBER;
1006 } else {
1007 /* search for something known */
1008 tl = tokenlist;
1009 tc = 0x00000001;
1010 ti = tokeninfo;
1011 while (*tl) {
1012 l = *(tl++);
1013 if (l == NTCC) {
1014 tc <<= 1;
1015 continue;
1017 /* if token class is expected, token
1018 * matches and it's not a longer word,
1019 * then this is what we are looking for
1021 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022 && *tl == *p && strncmp(p, tl, l) == 0
1023 && !((tc & TC_WORD) && isalnum_(p[l]))
1025 t_info = *ti;
1026 p += l;
1027 break;
1029 ti++;
1030 tl += l;
1033 if (!*tl) {
1034 /* it's a name (var/array/function),
1035 * otherwise it's something wrong
1037 if (!isalnum_(*p))
1038 syntax_error(EMSG_UNEXP_TOKEN);
1040 t_string = --p;
1041 while (isalnum_(*(++p))) {
1042 *(p-1) = *p;
1044 *(p-1) = '\0';
1045 tc = TC_VARIABLE;
1046 /* also consume whitespace between functionname and bracket */
1047 if (!(expected & TC_VARIABLE))
1048 skip_spaces(&p);
1049 if (*p == '(') {
1050 tc = TC_FUNCTION;
1051 } else {
1052 if (*p == '[') {
1053 p++;
1054 tc = TC_ARRAY;
1059 g_pos = p;
1061 /* skipping newlines in some cases */
1062 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063 goto readnext;
1065 /* insert concatenation operator when needed */
1066 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067 concat_inserted = TRUE;
1068 save_tclass = tc;
1069 save_info = t_info;
1070 tc = TC_BINOP;
1071 t_info = OC_CONCAT | SS | P(35);
1074 t_tclass = tc;
1076 ltclass = t_tclass;
1078 /* Are we ready for this? */
1079 if (!(ltclass & expected))
1080 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1083 return ltclass;
1084 #undef concat_inserted
1085 #undef save_tclass
1086 #undef save_info
1087 #undef ltclass
1090 static void rollback_token(void)
1092 t_rollback = TRUE;
1095 static node *new_node(uint32_t info)
1097 node *n;
1099 n = xzalloc(sizeof(node));
1100 n->info = info;
1101 n->lineno = g_lineno;
1102 return n;
1105 static node *mk_re_node(const char *s, node *n, regex_t *re)
1107 n->info = OC_REGEXP;
1108 n->l.re = re;
1109 n->r.ire = re + 1;
1110 xregcomp(re, s, REG_EXTENDED);
1111 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1113 return n;
1116 static node *condition(void)
1118 next_token(TC_SEQSTART);
1119 return parse_expr(TC_SEQTERM);
1122 /* parse expression terminated by given argument, return ptr
1123 * to built subtree. Terminator is eaten by parse_expr */
1124 static node *parse_expr(uint32_t iexp)
1126 node sn;
1127 node *cn = &sn;
1128 node *vn, *glptr;
1129 uint32_t tc, xtc;
1130 var *v;
1132 sn.info = PRIMASK;
1133 sn.r.n = glptr = NULL;
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1136 while (!((tc = next_token(xtc)) & iexp)) {
1137 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138 /* input redirection (<) attached to glptr node */
1139 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1140 cn->a.n = glptr;
1141 xtc = TC_OPERAND | TC_UOPPRE;
1142 glptr = NULL;
1144 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145 /* for binary and postfix-unary operators, jump back over
1146 * previous operators with higher priority */
1147 vn = cn;
1148 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1150 vn = vn->a.n;
1151 if ((t_info & OPCLSMASK) == OC_TERNARY)
1152 t_info += P(6);
1153 cn = vn->a.n->r.n = new_node(t_info);
1154 cn->a.n = vn->a.n;
1155 if (tc & TC_BINOP) {
1156 cn->l.n = vn;
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1159 /* it's a pipe */
1160 next_token(TC_GETLINE);
1161 /* give maximum priority to this pipe */
1162 cn->info &= ~PRIMASK;
1163 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1165 } else {
1166 cn->r.n = vn;
1167 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1169 vn->a.n = cn;
1171 } else {
1172 /* for operands and prefix-unary operators, attach them
1173 * to last node */
1174 vn = cn;
1175 cn = vn->r.n = new_node(t_info);
1176 cn->a.n = vn;
1177 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178 if (tc & (TC_OPERAND | TC_REGEXP)) {
1179 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180 /* one should be very careful with switch on tclass -
1181 * only simple tclasses should be used! */
1182 switch (tc) {
1183 case TC_VARIABLE:
1184 case TC_ARRAY:
1185 cn->info = OC_VAR;
1186 v = hash_search(ahash, t_string);
1187 if (v != NULL) {
1188 cn->info = OC_FNARG;
1189 cn->l.i = v->x.aidx;
1190 } else {
1191 cn->l.v = newvar(t_string);
1193 if (tc & TC_ARRAY) {
1194 cn->info |= xS;
1195 cn->r.n = parse_expr(TC_ARRTERM);
1197 break;
1199 case TC_NUMBER:
1200 case TC_STRING:
1201 cn->info = OC_VAR;
1202 v = cn->l.v = xzalloc(sizeof(var));
1203 if (tc & TC_NUMBER)
1204 setvar_i(v, t_double);
1205 else
1206 setvar_s(v, t_string);
1207 break;
1209 case TC_REGEXP:
1210 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1211 break;
1213 case TC_FUNCTION:
1214 cn->info = OC_FUNC;
1215 cn->r.f = newfunc(t_string);
1216 cn->l.n = condition();
1217 break;
1219 case TC_SEQSTART:
1220 cn = vn->r.n = parse_expr(TC_SEQTERM);
1221 cn->a.n = vn;
1222 break;
1224 case TC_GETLINE:
1225 glptr = cn;
1226 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227 break;
1229 case TC_BUILTIN:
1230 cn->l.n = condition();
1231 break;
1236 return sn.r.n;
1239 /* add node to chain. Return ptr to alloc'd node */
1240 static node *chain_node(uint32_t info)
1242 node *n;
1244 if (!seq->first)
1245 seq->first = seq->last = new_node(0);
1247 if (seq->programname != g_progname) {
1248 seq->programname = g_progname;
1249 n = chain_node(OC_NEWSOURCE);
1250 n->l.s = xstrdup(g_progname);
1253 n = seq->last;
1254 n->info = info;
1255 seq->last = n->a.n = new_node(OC_DONE);
1257 return n;
1260 static void chain_expr(uint32_t info)
1262 node *n;
1264 n = chain_node(info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266 if (t_tclass & TC_GRPTERM)
1267 rollback_token();
1270 static node *chain_loop(node *nn)
1272 node *n, *n2, *save_brk, *save_cont;
1274 save_brk = break_ptr;
1275 save_cont = continue_ptr;
1277 n = chain_node(OC_BR | Vx);
1278 continue_ptr = new_node(OC_EXEC);
1279 break_ptr = new_node(OC_EXEC);
1280 chain_group();
1281 n2 = chain_node(OC_EXEC | Vx);
1282 n2->l.n = nn;
1283 n2->a.n = n;
1284 continue_ptr->a.n = n2;
1285 break_ptr->a.n = n->r.n = seq->last;
1287 continue_ptr = save_cont;
1288 break_ptr = save_brk;
1290 return n;
1293 /* parse group and attach it to chain */
1294 static void chain_group(void)
1296 uint32_t c;
1297 node *n, *n2, *n3;
1299 do {
1300 c = next_token(TC_GRPSEQ);
1301 } while (c & TC_NEWLINE);
1303 if (c & TC_GRPSTART) {
1304 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305 if (t_tclass & TC_NEWLINE) continue;
1306 rollback_token();
1307 chain_group();
1309 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1310 rollback_token();
1311 chain_expr(OC_EXEC | Vx);
1312 } else { /* TC_STATEMNT */
1313 switch (t_info & OPCLSMASK) {
1314 case ST_IF:
1315 n = chain_node(OC_BR | Vx);
1316 n->l.n = condition();
1317 chain_group();
1318 n2 = chain_node(OC_EXEC);
1319 n->r.n = seq->last;
1320 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1321 chain_group();
1322 n2->a.n = seq->last;
1323 } else {
1324 rollback_token();
1326 break;
1328 case ST_WHILE:
1329 n2 = condition();
1330 n = chain_loop(NULL);
1331 n->l.n = n2;
1332 break;
1334 case ST_DO:
1335 n2 = chain_node(OC_EXEC);
1336 n = chain_loop(NULL);
1337 n2->a.n = n->a.n;
1338 next_token(TC_WHILE);
1339 n->l.n = condition();
1340 break;
1342 case ST_FOR:
1343 next_token(TC_SEQSTART);
1344 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345 if (t_tclass & TC_SEQTERM) { /* for-in */
1346 if ((n2->info & OPCLSMASK) != OC_IN)
1347 syntax_error(EMSG_UNEXP_TOKEN);
1348 n = chain_node(OC_WALKINIT | VV);
1349 n->l.n = n2->l.n;
1350 n->r.n = n2->r.n;
1351 n = chain_loop(NULL);
1352 n->info = OC_WALKNEXT | Vx;
1353 n->l.n = n2->l.n;
1354 } else { /* for (;;) */
1355 n = chain_node(OC_EXEC | Vx);
1356 n->l.n = n2;
1357 n2 = parse_expr(TC_SEMICOL);
1358 n3 = parse_expr(TC_SEQTERM);
1359 n = chain_loop(n3);
1360 n->l.n = n2;
1361 if (!n2)
1362 n->info = OC_EXEC;
1364 break;
1366 case OC_PRINT:
1367 case OC_PRINTF:
1368 n = chain_node(t_info);
1369 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370 if (t_tclass & TC_OUTRDR) {
1371 n->info |= t_info;
1372 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1374 if (t_tclass & TC_GRPTERM)
1375 rollback_token();
1376 break;
1378 case OC_BREAK:
1379 n = chain_node(OC_EXEC);
1380 n->a.n = break_ptr;
1381 break;
1383 case OC_CONTINUE:
1384 n = chain_node(OC_EXEC);
1385 n->a.n = continue_ptr;
1386 break;
1388 /* delete, next, nextfile, return, exit */
1389 default:
1390 chain_expr(t_info);
1395 static void parse_program(char *p)
1397 uint32_t tclass;
1398 node *cn;
1399 func *f;
1400 var *v;
1402 g_pos = p;
1403 t_lineno = 1;
1404 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1407 if (tclass & TC_OPTERM)
1408 continue;
1410 seq = &mainseq;
1411 if (tclass & TC_BEGIN) {
1412 seq = &beginseq;
1413 chain_group();
1415 } else if (tclass & TC_END) {
1416 seq = &endseq;
1417 chain_group();
1419 } else if (tclass & TC_FUNCDECL) {
1420 next_token(TC_FUNCTION);
1421 g_pos++;
1422 f = newfunc(t_string);
1423 f->body.first = NULL;
1424 f->nargs = 0;
1425 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426 v = findvar(ahash, t_string);
1427 v->x.aidx = (f->nargs)++;
1429 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1430 break;
1432 seq = &(f->body);
1433 chain_group();
1434 clear_array(ahash);
1436 } else if (tclass & TC_OPSEQ) {
1437 rollback_token();
1438 cn = chain_node(OC_TEST);
1439 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440 if (t_tclass & TC_GRPSTART) {
1441 rollback_token();
1442 chain_group();
1443 } else {
1444 chain_node(OC_PRINT);
1446 cn->r.n = mainseq.last;
1448 } else /* if (tclass & TC_GRPSTART) */ {
1449 rollback_token();
1450 chain_group();
1456 /* -------- program execution part -------- */
1458 static node *mk_splitter(const char *s, tsplitter *spl)
1460 regex_t *re, *ire;
1461 node *n;
1463 re = &spl->re[0];
1464 ire = &spl->re[1];
1465 n = &spl->n;
1466 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1467 regfree(re);
1468 regfree(ire); // TODO: nuke ire, use re+1?
1470 if (strlen(s) > 1) {
1471 mk_re_node(s, n, re);
1472 } else {
1473 n->info = (uint32_t) *s;
1476 return n;
1479 /* use node as a regular expression. Supplied with node ptr and regex_t
1480 * storage space. Return ptr to regex (if result points to preg, it should
1481 * be later regfree'd manually
1483 static regex_t *as_regex(node *op, regex_t *preg)
1485 int cflags;
1486 var *v;
1487 const char *s;
1489 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1490 return icase ? op->r.ire : op->l.re;
1492 v = nvalloc(1);
1493 s = getvar_s(evaluate(op, v));
1495 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1496 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1497 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1498 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1499 * (maybe gsub is not supposed to use REG_EXTENDED?).
1501 if (regcomp(preg, s, cflags)) {
1502 cflags &= ~REG_EXTENDED;
1503 xregcomp(preg, s, cflags);
1505 nvfree(v);
1506 return preg;
1509 /* gradually increasing buffer */
1510 static void qrealloc(char **b, int n, int *size)
1512 if (!*b || n >= *size) {
1513 *size = n + (n>>1) + 80;
1514 *b = xrealloc(*b, *size);
1518 /* resize field storage space */
1519 static void fsrealloc(int size)
1521 int i;
1523 if (size >= maxfields) {
1524 i = maxfields;
1525 maxfields = size + 16;
1526 Fields = xrealloc(Fields, maxfields * sizeof(var));
1527 for (; i < maxfields; i++) {
1528 Fields[i].type = VF_SPECIAL;
1529 Fields[i].string = NULL;
1533 if (size < nfields) {
1534 for (i = size; i < nfields; i++) {
1535 clrvar(Fields + i);
1538 nfields = size;
1541 static int awk_split(const char *s, node *spl, char **slist)
1543 int l, n = 0;
1544 char c[4];
1545 char *s1;
1546 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1548 /* in worst case, each char would be a separate field */
1549 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1550 strcpy(s1, s);
1552 c[0] = c[1] = (char)spl->info;
1553 c[2] = c[3] = '\0';
1554 if (*getvar_s(intvar[RS]) == '\0')
1555 c[2] = '\n';
1557 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1558 if (!*s)
1559 return n; /* "": zero fields */
1560 n++; /* at least one field will be there */
1561 do {
1562 l = strcspn(s, c+2); /* len till next NUL or \n */
1563 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1564 && pmatch[0].rm_so <= l
1566 l = pmatch[0].rm_so;
1567 if (pmatch[0].rm_eo == 0) {
1568 l++;
1569 pmatch[0].rm_eo++;
1571 n++; /* we saw yet another delimiter */
1572 } else {
1573 pmatch[0].rm_eo = l;
1574 if (s[l]) pmatch[0].rm_eo++;
1576 memcpy(s1, s, l);
1577 /* make sure we remove *all* of the separator chars */
1578 while (l < pmatch[0].rm_eo) {
1579 s1[l++] = '\0';
1581 nextword(&s1);
1582 s += pmatch[0].rm_eo;
1583 } while (*s);
1584 return n;
1586 if (c[0] == '\0') { /* null split */
1587 while (*s) {
1588 *s1++ = *s++;
1589 *s1++ = '\0';
1590 n++;
1592 return n;
1594 if (c[0] != ' ') { /* single-character split */
1595 if (icase) {
1596 c[0] = toupper(c[0]);
1597 c[1] = tolower(c[1]);
1599 if (*s1) n++;
1600 while ((s1 = strpbrk(s1, c))) {
1601 *s1++ = '\0';
1602 n++;
1604 return n;
1606 /* space split */
1607 while (*s) {
1608 s = skip_whitespace(s);
1609 if (!*s) break;
1610 n++;
1611 while (*s && !isspace(*s))
1612 *s1++ = *s++;
1613 *s1++ = '\0';
1615 return n;
1618 static void split_f0(void)
1620 /* static char *fstrings; */
1621 #define fstrings (G.split_f0__fstrings)
1623 int i, n;
1624 char *s;
1626 if (is_f0_split)
1627 return;
1629 is_f0_split = TRUE;
1630 free(fstrings);
1631 fsrealloc(0);
1632 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1633 fsrealloc(n);
1634 s = fstrings;
1635 for (i = 0; i < n; i++) {
1636 Fields[i].string = nextword(&s);
1637 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1640 /* set NF manually to avoid side effects */
1641 clrvar(intvar[NF]);
1642 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1643 intvar[NF]->number = nfields;
1644 #undef fstrings
1647 /* perform additional actions when some internal variables changed */
1648 static void handle_special(var *v)
1650 int n;
1651 char *b;
1652 const char *sep, *s;
1653 int sl, l, len, i, bsize;
1655 if (!(v->type & VF_SPECIAL))
1656 return;
1658 if (v == intvar[NF]) {
1659 n = (int)getvar_i(v);
1660 fsrealloc(n);
1662 /* recalculate $0 */
1663 sep = getvar_s(intvar[OFS]);
1664 sl = strlen(sep);
1665 b = NULL;
1666 len = 0;
1667 for (i = 0; i < n; i++) {
1668 s = getvar_s(&Fields[i]);
1669 l = strlen(s);
1670 if (b) {
1671 memcpy(b+len, sep, sl);
1672 len += sl;
1674 qrealloc(&b, len+l+sl, &bsize);
1675 memcpy(b+len, s, l);
1676 len += l;
1678 if (b)
1679 b[len] = '\0';
1680 setvar_p(intvar[F0], b);
1681 is_f0_split = TRUE;
1683 } else if (v == intvar[F0]) {
1684 is_f0_split = FALSE;
1686 } else if (v == intvar[FS]) {
1687 mk_splitter(getvar_s(v), &fsplitter);
1689 } else if (v == intvar[RS]) {
1690 mk_splitter(getvar_s(v), &rsplitter);
1692 } else if (v == intvar[IGNORECASE]) {
1693 icase = istrue(v);
1695 } else { /* $n */
1696 n = getvar_i(intvar[NF]);
1697 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1698 /* right here v is invalid. Just to note... */
1702 /* step through func/builtin/etc arguments */
1703 static node *nextarg(node **pn)
1705 node *n;
1707 n = *pn;
1708 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1709 *pn = n->r.n;
1710 n = n->l.n;
1711 } else {
1712 *pn = NULL;
1714 return n;
1717 static void hashwalk_init(var *v, xhash *array)
1719 char **w;
1720 hash_item *hi;
1721 unsigned i;
1723 if (v->type & VF_WALK)
1724 free(v->x.walker);
1726 v->type |= VF_WALK;
1727 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1728 w[0] = w[1] = (char *)(w + 2);
1729 for (i = 0; i < array->csize; i++) {
1730 hi = array->items[i];
1731 while (hi) {
1732 strcpy(*w, hi->name);
1733 nextword(w);
1734 hi = hi->next;
1739 static int hashwalk_next(var *v)
1741 char **w;
1743 w = v->x.walker;
1744 if (w[1] == w[0])
1745 return FALSE;
1747 setvar_s(v, nextword(w+1));
1748 return TRUE;
1751 /* evaluate node, return 1 when result is true, 0 otherwise */
1752 static int ptest(node *pattern)
1754 /* ptest__v is "static": to save stack space? */
1755 return istrue(evaluate(pattern, &G.ptest__v));
1758 /* read next record from stream rsm into a variable v */
1759 static int awk_getline(rstream *rsm, var *v)
1761 char *b;
1762 regmatch_t pmatch[2];
1763 int a, p, pp=0, size;
1764 int fd, so, eo, r, rp;
1765 char c, *m, *s;
1767 /* we're using our own buffer since we need access to accumulating
1768 * characters
1770 fd = fileno(rsm->F);
1771 m = rsm->buffer;
1772 a = rsm->adv;
1773 p = rsm->pos;
1774 size = rsm->size;
1775 c = (char) rsplitter.n.info;
1776 rp = 0;
1778 if (!m) qrealloc(&m, 256, &size);
1779 do {
1780 b = m + a;
1781 so = eo = p;
1782 r = 1;
1783 if (p > 0) {
1784 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1785 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1786 b, 1, pmatch, 0) == 0) {
1787 so = pmatch[0].rm_so;
1788 eo = pmatch[0].rm_eo;
1789 if (b[eo] != '\0')
1790 break;
1792 } else if (c != '\0') {
1793 s = strchr(b+pp, c);
1794 if (!s) s = memchr(b+pp, '\0', p - pp);
1795 if (s) {
1796 so = eo = s-b;
1797 eo++;
1798 break;
1800 } else {
1801 while (b[rp] == '\n')
1802 rp++;
1803 s = strstr(b+rp, "\n\n");
1804 if (s) {
1805 so = eo = s-b;
1806 while (b[eo] == '\n') eo++;
1807 if (b[eo] != '\0')
1808 break;
1813 if (a > 0) {
1814 memmove(m, (const void *)(m+a), p+1);
1815 b = m;
1816 a = 0;
1819 qrealloc(&m, a+p+128, &size);
1820 b = m + a;
1821 pp = p;
1822 p += safe_read(fd, b+p, size-p-1);
1823 if (p < pp) {
1824 p = 0;
1825 r = 0;
1826 setvar_i(intvar[ERRNO], errno);
1828 b[p] = '\0';
1830 } while (p > pp);
1832 if (p == 0) {
1833 r--;
1834 } else {
1835 c = b[so]; b[so] = '\0';
1836 setvar_s(v, b+rp);
1837 v->type |= VF_USER;
1838 b[so] = c;
1839 c = b[eo]; b[eo] = '\0';
1840 setvar_s(intvar[RT], b+so);
1841 b[eo] = c;
1844 rsm->buffer = m;
1845 rsm->adv = a + eo;
1846 rsm->pos = p - eo;
1847 rsm->size = size;
1849 return r;
1852 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1854 int r = 0;
1855 char c;
1856 const char *s = format;
1858 if (int_as_int && n == (int)n) {
1859 r = snprintf(b, size, "%d", (int)n);
1860 } else {
1861 do { c = *s; } while (c && *++s);
1862 if (strchr("diouxX", c)) {
1863 r = snprintf(b, size, format, (int)n);
1864 } else if (strchr("eEfgG", c)) {
1865 r = snprintf(b, size, format, n);
1866 } else {
1867 syntax_error(EMSG_INV_FMT);
1870 return r;
1873 /* formatted output into an allocated buffer, return ptr to buffer */
1874 static char *awk_printf(node *n)
1876 char *b = NULL;
1877 char *fmt, *s, *f;
1878 const char *s1;
1879 int i, j, incr, bsize;
1880 char c, c1;
1881 var *v, *arg;
1883 v = nvalloc(1);
1884 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1886 i = 0;
1887 while (*f) {
1888 s = f;
1889 while (*f && (*f != '%' || *(++f) == '%'))
1890 f++;
1891 while (*f && !isalpha(*f)) {
1892 if (*f == '*')
1893 syntax_error("%*x formats are not supported");
1894 f++;
1897 incr = (f - s) + MAXVARFMT;
1898 qrealloc(&b, incr + i, &bsize);
1899 c = *f;
1900 if (c != '\0') f++;
1901 c1 = *f;
1902 *f = '\0';
1903 arg = evaluate(nextarg(&n), v);
1905 j = i;
1906 if (c == 'c' || !c) {
1907 i += sprintf(b+i, s, is_numeric(arg) ?
1908 (char)getvar_i(arg) : *getvar_s(arg));
1909 } else if (c == 's') {
1910 s1 = getvar_s(arg);
1911 qrealloc(&b, incr+i+strlen(s1), &bsize);
1912 i += sprintf(b+i, s, s1);
1913 } else {
1914 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1916 *f = c1;
1918 /* if there was an error while sprintf, return value is negative */
1919 if (i < j) i = j;
1922 b = xrealloc(b, i + 1);
1923 free(fmt);
1924 nvfree(v);
1925 b[i] = '\0';
1926 return b;
1929 /* common substitution routine
1930 * replace (nm) substring of (src) that match (n) with (repl), store
1931 * result into (dest), return number of substitutions. If nm=0, replace
1932 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1933 * subexpression matching (\1-\9)
1935 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1937 char *ds = NULL;
1938 const char *s;
1939 const char *sp;
1940 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1941 regmatch_t pmatch[10];
1942 regex_t sreg, *re;
1944 re = as_regex(rn, &sreg);
1945 if (!src) src = intvar[F0];
1946 if (!dest) dest = intvar[F0];
1948 i = di = 0;
1949 sp = getvar_s(src);
1950 rl = strlen(repl);
1951 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1952 so = pmatch[0].rm_so;
1953 eo = pmatch[0].rm_eo;
1955 qrealloc(&ds, di + eo + rl, &dssize);
1956 memcpy(ds + di, sp, eo);
1957 di += eo;
1958 if (++i >= nm) {
1959 /* replace */
1960 di -= (eo - so);
1961 nbs = 0;
1962 for (s = repl; *s; s++) {
1963 ds[di++] = c = *s;
1964 if (c == '\\') {
1965 nbs++;
1966 continue;
1968 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1969 di -= ((nbs + 3) >> 1);
1970 j = 0;
1971 if (c != '&') {
1972 j = c - '0';
1973 nbs++;
1975 if (nbs % 2) {
1976 ds[di++] = c;
1977 } else {
1978 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1979 qrealloc(&ds, di + rl + n, &dssize);
1980 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1981 di += n;
1984 nbs = 0;
1988 sp += eo;
1989 if (i == nm) break;
1990 if (eo == so) {
1991 ds[di] = *sp++;
1992 if (!ds[di++]) break;
1996 qrealloc(&ds, di + strlen(sp), &dssize);
1997 strcpy(ds + di, sp);
1998 setvar_p(dest, ds);
1999 if (re == &sreg) regfree(re);
2000 return i;
2003 static var *exec_builtin(node *op, var *res)
2005 #define tspl (G.exec_builtin__tspl)
2007 int (*to_xxx)(int);
2008 var *tv;
2009 node *an[4];
2010 var *av[4];
2011 const char *as[4];
2012 regmatch_t pmatch[2];
2013 regex_t sreg, *re;
2014 node *spl;
2015 uint32_t isr, info;
2016 int nargs;
2017 time_t tt;
2018 char *s, *s1;
2019 int i, l, ll, n;
2021 tv = nvalloc(4);
2022 isr = info = op->info;
2023 op = op->l.n;
2025 av[2] = av[3] = NULL;
2026 for (i = 0; i < 4 && op; i++) {
2027 an[i] = nextarg(&op);
2028 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2029 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2030 isr >>= 1;
2033 nargs = i;
2034 if ((uint32_t)nargs < (info >> 30))
2035 syntax_error(EMSG_TOO_FEW_ARGS);
2037 switch (info & OPNMASK) {
2039 case B_a2:
2040 #if ENABLE_FEATURE_AWK_LIBM
2041 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2042 #else
2043 syntax_error(EMSG_NO_MATH);
2044 #endif
2045 break;
2047 case B_sp:
2048 if (nargs > 2) {
2049 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2050 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2051 } else {
2052 spl = &fsplitter.n;
2055 n = awk_split(as[0], spl, &s);
2056 s1 = s;
2057 clear_array(iamarray(av[1]));
2058 for (i = 1; i <= n; i++)
2059 setari_u(av[1], i, nextword(&s1));
2060 free(s);
2061 setvar_i(res, n);
2062 break;
2064 case B_ss:
2065 l = strlen(as[0]);
2066 i = getvar_i(av[1]) - 1;
2067 if (i > l) i = l;
2068 if (i < 0) i = 0;
2069 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2070 if (n < 0) n = 0;
2071 s = xstrndup(as[0]+i, n);
2072 setvar_p(res, s);
2073 break;
2075 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2076 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2077 case B_an:
2078 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2079 break;
2081 case B_co:
2082 setvar_i(res, ~getvar_i_int(av[0]));
2083 break;
2085 case B_ls:
2086 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2087 break;
2089 case B_or:
2090 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2091 break;
2093 case B_rs:
2094 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2095 break;
2097 case B_xo:
2098 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2099 break;
2101 case B_lo:
2102 to_xxx = tolower;
2103 goto lo_cont;
2105 case B_up:
2106 to_xxx = toupper;
2107 lo_cont:
2108 s1 = s = xstrdup(as[0]);
2109 while (*s1) {
2110 *s1 = (*to_xxx)(*s1);
2111 s1++;
2113 setvar_p(res, s);
2114 break;
2116 case B_ix:
2117 n = 0;
2118 ll = strlen(as[1]);
2119 l = strlen(as[0]) - ll;
2120 if (ll > 0 && l >= 0) {
2121 if (!icase) {
2122 s = strstr(as[0], as[1]);
2123 if (s) n = (s - as[0]) + 1;
2124 } else {
2125 /* this piece of code is terribly slow and
2126 * really should be rewritten
2128 for (i=0; i<=l; i++) {
2129 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2130 n = i+1;
2131 break;
2136 setvar_i(res, n);
2137 break;
2139 case B_ti:
2140 if (nargs > 1)
2141 tt = getvar_i(av[1]);
2142 else
2143 time(&tt);
2144 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2145 i = strftime(g_buf, MAXVARFMT,
2146 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2147 localtime(&tt));
2148 g_buf[i] = '\0';
2149 setvar_s(res, g_buf);
2150 break;
2152 case B_ma:
2153 re = as_regex(an[1], &sreg);
2154 n = regexec(re, as[0], 1, pmatch, 0);
2155 if (n == 0) {
2156 pmatch[0].rm_so++;
2157 pmatch[0].rm_eo++;
2158 } else {
2159 pmatch[0].rm_so = 0;
2160 pmatch[0].rm_eo = -1;
2162 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2163 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2164 setvar_i(res, pmatch[0].rm_so);
2165 if (re == &sreg) regfree(re);
2166 break;
2168 case B_ge:
2169 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2170 break;
2172 case B_gs:
2173 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2174 break;
2176 case B_su:
2177 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2178 break;
2181 nvfree(tv);
2182 return res;
2183 #undef tspl
2187 * Evaluate node - the heart of the program. Supplied with subtree
2188 * and place where to store result. returns ptr to result.
2190 #define XC(n) ((n) >> 8)
2192 static var *evaluate(node *op, var *res)
2194 /* This procedure is recursive so we should count every byte */
2195 #define fnargs (G.evaluate__fnargs)
2196 /* seed is initialized to 1 */
2197 #define seed (G.evaluate__seed)
2198 #define sreg (G.evaluate__sreg)
2200 node *op1;
2201 var *v1;
2202 union {
2203 var *v;
2204 const char *s;
2205 double d;
2206 int i;
2207 } L, R;
2208 uint32_t opinfo;
2209 int opn;
2210 union {
2211 char *s;
2212 rstream *rsm;
2213 FILE *F;
2214 var *v;
2215 regex_t *re;
2216 uint32_t info;
2217 } X;
2219 if (!op)
2220 return setvar_s(res, NULL);
2222 v1 = nvalloc(2);
2224 while (op) {
2225 opinfo = op->info;
2226 opn = (opinfo & OPNMASK);
2227 g_lineno = op->lineno;
2229 /* execute inevitable things */
2230 op1 = op->l.n;
2231 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2232 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2233 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2234 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2235 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2237 switch (XC(opinfo & OPCLSMASK)) {
2239 /* -- iterative node type -- */
2241 /* test pattern */
2242 case XC( OC_TEST ):
2243 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2244 /* it's range pattern */
2245 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2246 op->info |= OF_CHECKED;
2247 if (ptest(op1->r.n))
2248 op->info &= ~OF_CHECKED;
2250 op = op->a.n;
2251 } else {
2252 op = op->r.n;
2254 } else {
2255 op = (ptest(op1)) ? op->a.n : op->r.n;
2257 break;
2259 /* just evaluate an expression, also used as unconditional jump */
2260 case XC( OC_EXEC ):
2261 break;
2263 /* branch, used in if-else and various loops */
2264 case XC( OC_BR ):
2265 op = istrue(L.v) ? op->a.n : op->r.n;
2266 break;
2268 /* initialize for-in loop */
2269 case XC( OC_WALKINIT ):
2270 hashwalk_init(L.v, iamarray(R.v));
2271 break;
2273 /* get next array item */
2274 case XC( OC_WALKNEXT ):
2275 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2276 break;
2278 case XC( OC_PRINT ):
2279 case XC( OC_PRINTF ):
2280 X.F = stdout;
2281 if (op->r.n) {
2282 X.rsm = newfile(R.s);
2283 if (!X.rsm->F) {
2284 if (opn == '|') {
2285 X.rsm->F = popen(R.s, "w");
2286 if (X.rsm->F == NULL)
2287 bb_perror_msg_and_die("popen");
2288 X.rsm->is_pipe = 1;
2289 } else {
2290 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2293 X.F = X.rsm->F;
2296 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2297 if (!op1) {
2298 fputs(getvar_s(intvar[F0]), X.F);
2299 } else {
2300 while (op1) {
2301 L.v = evaluate(nextarg(&op1), v1);
2302 if (L.v->type & VF_NUMBER) {
2303 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2304 getvar_i(L.v), TRUE);
2305 fputs(g_buf, X.F);
2306 } else {
2307 fputs(getvar_s(L.v), X.F);
2310 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2313 fputs(getvar_s(intvar[ORS]), X.F);
2315 } else { /* OC_PRINTF */
2316 L.s = awk_printf(op1);
2317 fputs(L.s, X.F);
2318 free((char*)L.s);
2320 fflush(X.F);
2321 break;
2323 case XC( OC_DELETE ):
2324 X.info = op1->info & OPCLSMASK;
2325 if (X.info == OC_VAR) {
2326 R.v = op1->l.v;
2327 } else if (X.info == OC_FNARG) {
2328 R.v = &fnargs[op1->l.i];
2329 } else {
2330 syntax_error(EMSG_NOT_ARRAY);
2333 if (op1->r.n) {
2334 clrvar(L.v);
2335 L.s = getvar_s(evaluate(op1->r.n, v1));
2336 hash_remove(iamarray(R.v), L.s);
2337 } else {
2338 clear_array(iamarray(R.v));
2340 break;
2342 case XC( OC_NEWSOURCE ):
2343 g_progname = op->l.s;
2344 break;
2346 case XC( OC_RETURN ):
2347 copyvar(res, L.v);
2348 break;
2350 case XC( OC_NEXTFILE ):
2351 nextfile = TRUE;
2352 case XC( OC_NEXT ):
2353 nextrec = TRUE;
2354 case XC( OC_DONE ):
2355 clrvar(res);
2356 break;
2358 case XC( OC_EXIT ):
2359 awk_exit(L.d);
2361 /* -- recursive node type -- */
2363 case XC( OC_VAR ):
2364 L.v = op->l.v;
2365 if (L.v == intvar[NF])
2366 split_f0();
2367 goto v_cont;
2369 case XC( OC_FNARG ):
2370 L.v = &fnargs[op->l.i];
2371 v_cont:
2372 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2373 break;
2375 case XC( OC_IN ):
2376 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2377 break;
2379 case XC( OC_REGEXP ):
2380 op1 = op;
2381 L.s = getvar_s(intvar[F0]);
2382 goto re_cont;
2384 case XC( OC_MATCH ):
2385 op1 = op->r.n;
2386 re_cont:
2387 X.re = as_regex(op1, &sreg);
2388 R.i = regexec(X.re, L.s, 0, NULL, 0);
2389 if (X.re == &sreg) regfree(X.re);
2390 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2391 break;
2393 case XC( OC_MOVE ):
2394 /* if source is a temporary string, jusk relink it to dest */
2395 if (R.v == v1+1 && R.v->string) {
2396 res = setvar_p(L.v, R.v->string);
2397 R.v->string = NULL;
2398 } else {
2399 res = copyvar(L.v, R.v);
2401 break;
2403 case XC( OC_TERNARY ):
2404 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2405 syntax_error(EMSG_POSSIBLE_ERROR);
2406 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2407 break;
2409 case XC( OC_FUNC ):
2410 if (!op->r.f->body.first)
2411 syntax_error(EMSG_UNDEF_FUNC);
2413 X.v = R.v = nvalloc(op->r.f->nargs+1);
2414 while (op1) {
2415 L.v = evaluate(nextarg(&op1), v1);
2416 copyvar(R.v, L.v);
2417 R.v->type |= VF_CHILD;
2418 R.v->x.parent = L.v;
2419 if (++R.v - X.v >= op->r.f->nargs)
2420 break;
2423 R.v = fnargs;
2424 fnargs = X.v;
2426 L.s = g_progname;
2427 res = evaluate(op->r.f->body.first, res);
2428 g_progname = L.s;
2430 nvfree(fnargs);
2431 fnargs = R.v;
2432 break;
2434 case XC( OC_GETLINE ):
2435 case XC( OC_PGETLINE ):
2436 if (op1) {
2437 X.rsm = newfile(L.s);
2438 if (!X.rsm->F) {
2439 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2440 X.rsm->F = popen(L.s, "r");
2441 X.rsm->is_pipe = TRUE;
2442 } else {
2443 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2446 } else {
2447 if (!iF) iF = next_input_file();
2448 X.rsm = iF;
2451 if (!X.rsm->F) {
2452 setvar_i(intvar[ERRNO], errno);
2453 setvar_i(res, -1);
2454 break;
2457 if (!op->r.n)
2458 R.v = intvar[F0];
2460 L.i = awk_getline(X.rsm, R.v);
2461 if (L.i > 0) {
2462 if (!op1) {
2463 incvar(intvar[FNR]);
2464 incvar(intvar[NR]);
2467 setvar_i(res, L.i);
2468 break;
2470 /* simple builtins */
2471 case XC( OC_FBLTIN ):
2472 switch (opn) {
2474 case F_in:
2475 R.d = (int)L.d;
2476 break;
2478 case F_rn:
2479 R.d = (double)rand() / (double)RAND_MAX;
2480 break;
2481 #if ENABLE_FEATURE_AWK_LIBM
2482 case F_co:
2483 R.d = cos(L.d);
2484 break;
2486 case F_ex:
2487 R.d = exp(L.d);
2488 break;
2490 case F_lg:
2491 R.d = log(L.d);
2492 break;
2494 case F_si:
2495 R.d = sin(L.d);
2496 break;
2498 case F_sq:
2499 R.d = sqrt(L.d);
2500 break;
2501 #else
2502 case F_co:
2503 case F_ex:
2504 case F_lg:
2505 case F_si:
2506 case F_sq:
2507 syntax_error(EMSG_NO_MATH);
2508 break;
2509 #endif
2510 case F_sr:
2511 R.d = (double)seed;
2512 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2513 srand(seed);
2514 break;
2516 case F_ti:
2517 R.d = time(NULL);
2518 break;
2520 case F_le:
2521 if (!op1)
2522 L.s = getvar_s(intvar[F0]);
2523 R.d = strlen(L.s);
2524 break;
2526 case F_sy:
2527 fflush(NULL);
2528 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2529 ? (system(L.s) >> 8) : 0;
2530 break;
2532 case F_ff:
2533 if (!op1)
2534 fflush(stdout);
2535 else {
2536 if (L.s && *L.s) {
2537 X.rsm = newfile(L.s);
2538 fflush(X.rsm->F);
2539 } else {
2540 fflush(NULL);
2543 break;
2545 case F_cl:
2546 X.rsm = (rstream *)hash_search(fdhash, L.s);
2547 if (X.rsm) {
2548 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2549 free(X.rsm->buffer);
2550 hash_remove(fdhash, L.s);
2552 if (R.i != 0)
2553 setvar_i(intvar[ERRNO], errno);
2554 R.d = (double)R.i;
2555 break;
2557 setvar_i(res, R.d);
2558 break;
2560 case XC( OC_BUILTIN ):
2561 res = exec_builtin(op, res);
2562 break;
2564 case XC( OC_SPRINTF ):
2565 setvar_p(res, awk_printf(op1));
2566 break;
2568 case XC( OC_UNARY ):
2569 X.v = R.v;
2570 L.d = R.d = getvar_i(R.v);
2571 switch (opn) {
2572 case 'P':
2573 L.d = ++R.d;
2574 goto r_op_change;
2575 case 'p':
2576 R.d++;
2577 goto r_op_change;
2578 case 'M':
2579 L.d = --R.d;
2580 goto r_op_change;
2581 case 'm':
2582 R.d--;
2583 goto r_op_change;
2584 case '!':
2585 L.d = istrue(X.v) ? 0 : 1;
2586 break;
2587 case '-':
2588 L.d = -R.d;
2589 break;
2590 r_op_change:
2591 setvar_i(X.v, R.d);
2593 setvar_i(res, L.d);
2594 break;
2596 case XC( OC_FIELD ):
2597 R.i = (int)getvar_i(R.v);
2598 if (R.i == 0) {
2599 res = intvar[F0];
2600 } else {
2601 split_f0();
2602 if (R.i > nfields)
2603 fsrealloc(R.i);
2604 res = &Fields[R.i - 1];
2606 break;
2608 /* concatenation (" ") and index joining (",") */
2609 case XC( OC_CONCAT ):
2610 case XC( OC_COMMA ):
2611 opn = strlen(L.s) + strlen(R.s) + 2;
2612 X.s = xmalloc(opn);
2613 strcpy(X.s, L.s);
2614 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2615 L.s = getvar_s(intvar[SUBSEP]);
2616 X.s = xrealloc(X.s, opn + strlen(L.s));
2617 strcat(X.s, L.s);
2619 strcat(X.s, R.s);
2620 setvar_p(res, X.s);
2621 break;
2623 case XC( OC_LAND ):
2624 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2625 break;
2627 case XC( OC_LOR ):
2628 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2629 break;
2631 case XC( OC_BINARY ):
2632 case XC( OC_REPLACE ):
2633 R.d = getvar_i(R.v);
2634 switch (opn) {
2635 case '+':
2636 L.d += R.d;
2637 break;
2638 case '-':
2639 L.d -= R.d;
2640 break;
2641 case '*':
2642 L.d *= R.d;
2643 break;
2644 case '/':
2645 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2646 L.d /= R.d;
2647 break;
2648 case '&':
2649 #if ENABLE_FEATURE_AWK_LIBM
2650 L.d = pow(L.d, R.d);
2651 #else
2652 syntax_error(EMSG_NO_MATH);
2653 #endif
2654 break;
2655 case '%':
2656 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2657 L.d -= (int)(L.d / R.d) * R.d;
2658 break;
2660 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2661 break;
2663 case XC( OC_COMPARE ):
2664 if (is_numeric(L.v) && is_numeric(R.v)) {
2665 L.d = getvar_i(L.v) - getvar_i(R.v);
2666 } else {
2667 L.s = getvar_s(L.v);
2668 R.s = getvar_s(R.v);
2669 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2671 switch (opn & 0xfe) {
2672 case 0:
2673 R.i = (L.d > 0);
2674 break;
2675 case 2:
2676 R.i = (L.d >= 0);
2677 break;
2678 case 4:
2679 R.i = (L.d == 0);
2680 break;
2682 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2683 break;
2685 default:
2686 syntax_error(EMSG_POSSIBLE_ERROR);
2688 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2689 op = op->a.n;
2690 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2691 break;
2692 if (nextrec)
2693 break;
2695 nvfree(v1);
2696 return res;
2697 #undef fnargs
2698 #undef seed
2699 #undef sreg
2703 /* -------- main & co. -------- */
2705 static int awk_exit(int r)
2707 var tv;
2708 unsigned i;
2709 hash_item *hi;
2711 zero_out_var(&tv);
2713 if (!exiting) {
2714 exiting = TRUE;
2715 nextrec = FALSE;
2716 evaluate(endseq.first, &tv);
2719 /* waiting for children */
2720 for (i = 0; i < fdhash->csize; i++) {
2721 hi = fdhash->items[i];
2722 while (hi) {
2723 if (hi->data.rs.F && hi->data.rs.is_pipe)
2724 pclose(hi->data.rs.F);
2725 hi = hi->next;
2729 exit(r);
2732 /* if expr looks like "var=value", perform assignment and return 1,
2733 * otherwise return 0 */
2734 static int is_assignment(const char *expr)
2736 char *exprc, *s, *s0, *s1;
2738 exprc = xstrdup(expr);
2739 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2740 free(exprc);
2741 return FALSE;
2744 *(s++) = '\0';
2745 s0 = s1 = s;
2746 while (*s)
2747 *(s1++) = nextchar(&s);
2749 *s1 = '\0';
2750 setvar_u(newvar(exprc), s0);
2751 free(exprc);
2752 return TRUE;
2755 /* switch to next input file */
2756 static rstream *next_input_file(void)
2758 #define rsm (G.next_input_file__rsm)
2759 #define files_happen (G.next_input_file__files_happen)
2761 FILE *F = NULL;
2762 const char *fname, *ind;
2764 if (rsm.F) fclose(rsm.F);
2765 rsm.F = NULL;
2766 rsm.pos = rsm.adv = 0;
2768 do {
2769 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2770 if (files_happen)
2771 return NULL;
2772 fname = "-";
2773 F = stdin;
2774 } else {
2775 ind = getvar_s(incvar(intvar[ARGIND]));
2776 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2777 if (fname && *fname && !is_assignment(fname))
2778 F = xfopen_stdin(fname);
2780 } while (!F);
2782 files_happen = TRUE;
2783 setvar_s(intvar[FILENAME], fname);
2784 rsm.F = F;
2785 return &rsm;
2786 #undef rsm
2787 #undef files_happen
2790 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2791 int awk_main(int argc, char **argv)
2793 unsigned opt;
2794 char *opt_F, *opt_W;
2795 llist_t *list_v = NULL;
2796 llist_t *list_f = NULL;
2797 int i, j;
2798 var *v;
2799 var tv;
2800 char **envp;
2801 char *vnames = (char *)vNames; /* cheat */
2802 char *vvalues = (char *)vValues;
2804 INIT_G();
2806 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2807 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2808 if (ENABLE_LOCALE_SUPPORT)
2809 setlocale(LC_NUMERIC, "C");
2811 zero_out_var(&tv);
2813 /* allocate global buffer */
2814 g_buf = xmalloc(MAXVARFMT + 1);
2816 vhash = hash_init();
2817 ahash = hash_init();
2818 fdhash = hash_init();
2819 fnhash = hash_init();
2821 /* initialize variables */
2822 for (i = 0; *vnames; i++) {
2823 intvar[i] = v = newvar(nextword(&vnames));
2824 if (*vvalues != '\377')
2825 setvar_s(v, nextword(&vvalues));
2826 else
2827 setvar_i(v, 0);
2829 if (*vnames == '*') {
2830 v->type |= VF_SPECIAL;
2831 vnames++;
2835 handle_special(intvar[FS]);
2836 handle_special(intvar[RS]);
2838 newfile("/dev/stdin")->F = stdin;
2839 newfile("/dev/stdout")->F = stdout;
2840 newfile("/dev/stderr")->F = stderr;
2842 /* Huh, people report that sometimes environ is NULL. Oh well. */
2843 if (environ) for (envp = environ; *envp; envp++) {
2844 /* environ is writable, thus we don't strdup it needlessly */
2845 char *s = *envp;
2846 char *s1 = strchr(s, '=');
2847 if (s1) {
2848 *s1 = '\0';
2849 /* Both findvar and setvar_u take const char*
2850 * as 2nd arg -> environment is not trashed */
2851 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2852 *s1 = '=';
2855 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2856 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2857 argv += optind;
2858 argc -= optind;
2859 if (opt & 0x1)
2860 setvar_s(intvar[FS], opt_F); // -F
2861 while (list_v) { /* -v */
2862 if (!is_assignment(llist_pop(&list_v)))
2863 bb_show_usage();
2865 if (list_f) { /* -f */
2866 do {
2867 char *s = NULL;
2868 FILE *from_file;
2870 g_progname = llist_pop(&list_f);
2871 from_file = xfopen_stdin(g_progname);
2872 /* one byte is reserved for some trick in next_token */
2873 for (i = j = 1; j > 0; i += j) {
2874 s = xrealloc(s, i + 4096);
2875 j = fread(s + i, 1, 4094, from_file);
2877 s[i] = '\0';
2878 fclose(from_file);
2879 parse_program(s + 1);
2880 free(s);
2881 } while (list_f);
2882 argc++;
2883 } else { // no -f: take program from 1st parameter
2884 if (!argc)
2885 bb_show_usage();
2886 g_progname = "cmd. line";
2887 parse_program(*argv++);
2889 if (opt & 0x8) // -W
2890 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2892 /* fill in ARGV array */
2893 setvar_i(intvar[ARGC], argc);
2894 setari_u(intvar[ARGV], 0, "awk");
2895 i = 0;
2896 while (*argv)
2897 setari_u(intvar[ARGV], ++i, *argv++);
2899 evaluate(beginseq.first, &tv);
2900 if (!mainseq.first && !endseq.first)
2901 awk_exit(EXIT_SUCCESS);
2903 /* input file could already be opened in BEGIN block */
2904 if (!iF) iF = next_input_file();
2906 /* passing through input files */
2907 while (iF) {
2908 nextfile = FALSE;
2909 setvar_i(intvar[FNR], 0);
2911 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2912 nextrec = FALSE;
2913 incvar(intvar[NR]);
2914 incvar(intvar[FNR]);
2915 evaluate(mainseq.first, &tv);
2917 if (nextfile)
2918 break;
2921 if (i < 0)
2922 syntax_error(strerror(errno));
2924 iF = next_input_file();
2927 awk_exit(EXIT_SUCCESS);
2928 /*return 0;*/