Changes for kernel and Busybox
[tomato.git] / release / src / router / busybox / editors / awk.c
blob71abca215a4bb3b3942f8cad35c8207cc3fee517
1 /* vi: set sw=4 ts=4: */
2 /*
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
17 #include "libbb.h"
18 #include "xregex.h"
19 #include <math.h>
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
28 #define debug_printf_parse(...) do {} while (0)
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #endif
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #endif
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38 #endif
42 #define MAXVARFMT 240
43 #define MINNVBLOCK 64
45 /* variable flags */
46 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
47 #define VF_ARRAY 0x0002 /* 1 = it's an array */
49 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
50 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
52 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
60 typedef struct walker_list {
61 char *end;
62 char *cur;
63 struct walker_list *prev;
64 char wbuf[1];
65 } walker_list;
67 /* Variable */
68 typedef struct var_s {
69 unsigned type; /* flags */
70 double number;
71 char *string;
72 union {
73 int aidx; /* func arg idx (for compilation stage) */
74 struct xhash_s *array; /* array ptr */
75 struct var_s *parent; /* for func args, ptr to actual parameter */
76 walker_list *walker; /* list of array elements (for..in) */
77 } x;
78 } var;
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
82 struct node_s *first;
83 struct node_s *last;
84 const char *programname;
85 } chain;
87 /* Function */
88 typedef struct func_s {
89 unsigned nargs;
90 struct chain_s body;
91 } func;
93 /* I/O stream */
94 typedef struct rstream_s {
95 FILE *F;
96 char *buffer;
97 int adv;
98 int size;
99 int pos;
100 smallint is_pipe;
101 } rstream;
103 typedef struct hash_item_s {
104 union {
105 struct var_s v; /* variable/array hash */
106 struct rstream_s rs; /* redirect streams hash */
107 struct func_s f; /* functions hash */
108 } data;
109 struct hash_item_s *next; /* next in chain */
110 char name[1]; /* really it's longer */
111 } hash_item;
113 typedef struct xhash_s {
114 unsigned nel; /* num of elements */
115 unsigned csize; /* current hash size */
116 unsigned nprime; /* next hash size in PRIMES[] */
117 unsigned glen; /* summary length of item names */
118 struct hash_item_s **items;
119 } xhash;
121 /* Tree node */
122 typedef struct node_s {
123 uint32_t info;
124 unsigned lineno;
125 union {
126 struct node_s *n;
127 var *v;
128 int aidx;
129 char *new_progname;
130 regex_t *re;
131 } l;
132 union {
133 struct node_s *n;
134 regex_t *ire;
135 func *f;
136 } r;
137 union {
138 struct node_s *n;
139 } a;
140 } node;
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
144 int size;
145 var *pos;
146 struct nvblock_s *prev;
147 struct nvblock_s *next;
148 var nv[];
149 } nvblock;
151 typedef struct tsplitter_s {
152 node n;
153 regex_t re[2];
154 } tsplitter;
156 /* simple token classes */
157 /* Order and hex values are very important!!! See next_token() */
158 #define TC_SEQSTART 1 /* ( */
159 #define TC_SEQTERM (1 << 1) /* ) */
160 #define TC_REGEXP (1 << 2) /* /.../ */
161 #define TC_OUTRDR (1 << 3) /* | > >> */
162 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
163 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
164 #define TC_BINOPX (1 << 6) /* two-opnd operator */
165 #define TC_IN (1 << 7)
166 #define TC_COMMA (1 << 8)
167 #define TC_PIPE (1 << 9) /* input redirection pipe */
168 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
169 #define TC_ARRTERM (1 << 11) /* ] */
170 #define TC_GRPSTART (1 << 12) /* { */
171 #define TC_GRPTERM (1 << 13) /* } */
172 #define TC_SEMICOL (1 << 14)
173 #define TC_NEWLINE (1 << 15)
174 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
175 #define TC_WHILE (1 << 17)
176 #define TC_ELSE (1 << 18)
177 #define TC_BUILTIN (1 << 19)
178 #define TC_GETLINE (1 << 20)
179 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
180 #define TC_BEGIN (1 << 22)
181 #define TC_END (1 << 23)
182 #define TC_EOF (1 << 24)
183 #define TC_VARIABLE (1 << 25)
184 #define TC_ARRAY (1 << 26)
185 #define TC_FUNCTION (1 << 27)
186 #define TC_STRING (1 << 28)
187 #define TC_NUMBER (1 << 29)
189 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
191 /* combined token classes */
192 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
204 /* discard newlines after these */
205 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206 | TC_BINOP | TC_OPTERM)
208 /* what can expression begin with */
209 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216 | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
219 #define OF_RES1 0x010000
220 #define OF_RES2 0x020000
221 #define OF_STR1 0x040000
222 #define OF_STR2 0x080000
223 #define OF_NUM1 0x100000
224 #define OF_CHECKED 0x200000
226 /* combined operator flags */
227 #define xx 0
228 #define xV OF_RES2
229 #define xS (OF_RES2 | OF_STR2)
230 #define Vx OF_RES1
231 #define VV (OF_RES1 | OF_RES2)
232 #define Nx (OF_RES1 | OF_NUM1)
233 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx (OF_RES1 | OF_STR1)
235 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK 0x007F
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
245 #undef P
246 #undef PRIMASK
247 #undef PRIMASK2
248 #define P(x) (x << 24)
249 #define PRIMASK 0x7F000000
250 #define PRIMASK2 0x7E000000
252 /* Operation classes */
254 #define SHIFT_TIL_THIS 0x0600
255 #define RECUR_FROM_THIS 0x1000
257 enum {
258 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
259 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
261 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
262 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
263 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
265 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
266 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
267 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
268 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
269 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
270 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
271 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
272 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
273 OC_DONE = 0x2800,
275 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
276 ST_WHILE = 0x3300
279 /* simple builtins */
280 enum {
281 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
282 F_ti, F_le, F_sy, F_ff, F_cl
285 /* builtins */
286 enum {
287 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
288 B_ge, B_gs, B_su,
289 B_an, B_co, B_ls, B_or, B_rs, B_xo,
292 /* tokens and their corresponding info values */
294 #define NTC "\377" /* switch to next token class (tc<<1) */
295 #define NTCC '\377'
297 #define OC_B OC_BUILTIN
299 static const char tokenlist[] ALIGN1 =
300 "\1(" NTC
301 "\1)" NTC
302 "\1/" NTC /* REGEXP */
303 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
304 "\2++" "\2--" NTC /* UOPPOST */
305 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
306 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
307 "\2*=" "\2/=" "\2%=" "\2^="
308 "\1+" "\1-" "\3**=" "\2**"
309 "\1/" "\1%" "\1^" "\1*"
310 "\2!=" "\2>=" "\2<=" "\1>"
311 "\1<" "\2!~" "\1~" "\2&&"
312 "\2||" "\1?" "\1:" NTC
313 "\2in" NTC
314 "\1," NTC
315 "\1|" NTC
316 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
317 "\1]" NTC
318 "\1{" NTC
319 "\1}" NTC
320 "\1;" NTC
321 "\1\n" NTC
322 "\2if" "\2do" "\3for" "\5break" /* STATX */
323 "\10continue" "\6delete" "\5print"
324 "\6printf" "\4next" "\10nextfile"
325 "\6return" "\4exit" NTC
326 "\5while" NTC
327 "\4else" NTC
329 "\3and" "\5compl" "\6lshift" "\2or"
330 "\6rshift" "\3xor"
331 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
332 "\3cos" "\3exp" "\3int" "\3log"
333 "\4rand" "\3sin" "\4sqrt" "\5srand"
334 "\6gensub" "\4gsub" "\5index" "\6length"
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
338 "\7getline" NTC
339 "\4func" "\10function" NTC
340 "\5BEGIN" NTC
341 "\3END"
342 /* compiler adds trailing "\0" */
345 static const uint32_t tokeninfo[] = {
348 OC_REGEXP,
349 xS|'a', xS|'w', xS|'|',
350 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
351 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
352 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
356 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
357 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
358 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359 OC_IN|SV|P(49), /* in */
360 OC_COMMA|SS|P(80),
361 OC_PGETLINE|SV|P(37),
362 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
363 0, /* ] */
367 0, /* \n */
368 ST_IF, ST_DO, ST_FOR, OC_BREAK,
369 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
370 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
371 OC_RETURN|Vx, OC_EXIT|Nx,
372 ST_WHILE,
373 0, /* else */
375 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
382 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384 OC_GETLINE|SV|P(0),
385 0, 0,
387 0 /* END */
390 /* internal variable names and their initial values */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
392 enum {
393 CONVFMT, OFMT, FS, OFS,
394 ORS, RS, RT, FILENAME,
395 SUBSEP, F0, ARGIND, ARGC,
396 ARGV, ERRNO, FNR, NR,
397 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
400 static const char vNames[] ALIGN1 =
401 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
402 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
403 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
404 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
405 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
407 static const char vValues[] ALIGN1 =
408 "%.6g\0" "%.6g\0" " \0" " \0"
409 "\n\0" "\n\0" "\0" "\0"
410 "\034\0" "\0" "\377";
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
417 /* Globals. Split in two parts so that first one is addressed
418 * with (mostly short) negative offsets.
419 * NB: it's unsafe to put members of type "double"
420 * into globals2 (gcc may fail to align them).
422 struct globals {
423 double t_double;
424 chain beginseq, mainseq, endseq;
425 chain *seq;
426 node *break_ptr, *continue_ptr;
427 rstream *iF;
428 xhash *vhash, *ahash, *fdhash, *fnhash;
429 const char *g_progname;
430 int g_lineno;
431 int nfields;
432 int maxfields; /* used in fsrealloc() only */
433 var *Fields;
434 nvblock *g_cb;
435 char *g_pos;
436 char *g_buf;
437 smallint icase;
438 smallint exiting;
439 smallint nextrec;
440 smallint nextfile;
441 smallint is_f0_split;
442 smallint t_rollback;
444 struct globals2 {
445 uint32_t t_info; /* often used */
446 uint32_t t_tclass;
447 char *t_string;
448 int t_lineno;
450 var *intvar[NUM_INTERNAL_VARS]; /* often used */
452 /* former statics from various functions */
453 char *split_f0__fstrings;
455 uint32_t next_token__save_tclass;
456 uint32_t next_token__save_info;
457 uint32_t next_token__ltclass;
458 smallint next_token__concat_inserted;
460 smallint next_input_file__files_happen;
461 rstream next_input_file__rsm;
463 var *evaluate__fnargs;
464 unsigned evaluate__seed;
465 regex_t evaluate__sreg;
467 var ptest__v;
469 tsplitter exec_builtin__tspl;
471 /* biggest and least used members go last */
472 tsplitter fsplitter, rsplitter;
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double (G1.t_double )
482 #define beginseq (G1.beginseq )
483 #define mainseq (G1.mainseq )
484 #define endseq (G1.endseq )
485 #define seq (G1.seq )
486 #define break_ptr (G1.break_ptr )
487 #define continue_ptr (G1.continue_ptr)
488 #define iF (G1.iF )
489 #define vhash (G1.vhash )
490 #define ahash (G1.ahash )
491 #define fdhash (G1.fdhash )
492 #define fnhash (G1.fnhash )
493 #define g_progname (G1.g_progname )
494 #define g_lineno (G1.g_lineno )
495 #define nfields (G1.nfields )
496 #define maxfields (G1.maxfields )
497 #define Fields (G1.Fields )
498 #define g_cb (G1.g_cb )
499 #define g_pos (G1.g_pos )
500 #define g_buf (G1.g_buf )
501 #define icase (G1.icase )
502 #define exiting (G1.exiting )
503 #define nextrec (G1.nextrec )
504 #define nextfile (G1.nextfile )
505 #define is_f0_split (G1.is_f0_split )
506 #define t_rollback (G1.t_rollback )
507 #define t_info (G.t_info )
508 #define t_tclass (G.t_tclass )
509 #define t_string (G.t_string )
510 #define t_lineno (G.t_lineno )
511 #define intvar (G.intvar )
512 #define fsplitter (G.fsplitter )
513 #define rsplitter (G.rsplitter )
514 #define INIT_G() do { \
515 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516 G.next_token__ltclass = TC_OPTERM; \
517 G.evaluate__seed = 1; \
518 } while (0)
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
530 /* ---- error handling ---- */
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
543 static void zero_out_var(var *vp)
545 memset(vp, 0, sizeof(*vp));
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
551 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
554 /* ---- hash stuff ---- */
556 static unsigned hashidx(const char *name)
558 unsigned idx = 0;
560 while (*name)
561 idx = *name++ + (idx << 6) - idx;
562 return idx;
565 /* create new hash */
566 static xhash *hash_init(void)
568 xhash *newhash;
570 newhash = xzalloc(sizeof(*newhash));
571 newhash->csize = FIRST_PRIME;
572 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
574 return newhash;
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
580 hash_item *hi;
582 hi = hash->items[hashidx(name) % hash->csize];
583 while (hi) {
584 if (strcmp(hi->name, name) == 0)
585 return &hi->data;
586 hi = hi->next;
588 return NULL;
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
594 unsigned newsize, i, idx;
595 hash_item **newitems, *hi, *thi;
597 if (hash->nprime == ARRAY_SIZE(PRIMES))
598 return;
600 newsize = PRIMES[hash->nprime++];
601 newitems = xzalloc(newsize * sizeof(newitems[0]));
603 for (i = 0; i < hash->csize; i++) {
604 hi = hash->items[i];
605 while (hi) {
606 thi = hi;
607 hi = thi->next;
608 idx = hashidx(thi->name) % newsize;
609 thi->next = newitems[idx];
610 newitems[idx] = thi;
614 free(hash->items);
615 hash->csize = newsize;
616 hash->items = newitems;
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
622 hash_item *hi;
623 unsigned idx;
624 int l;
626 hi = hash_search(hash, name);
627 if (!hi) {
628 if (++hash->nel / hash->csize > 10)
629 hash_rebuild(hash);
631 l = strlen(name) + 1;
632 hi = xzalloc(sizeof(*hi) + l);
633 strcpy(hi->name, name);
635 idx = hashidx(name) % hash->csize;
636 hi->next = hash->items[idx];
637 hash->items[idx] = hi;
638 hash->glen += l;
640 return &hi->data;
643 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
644 #define newvar(name) ((var*) hash_find(vhash, (name)))
645 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
648 static void hash_remove(xhash *hash, const char *name)
650 hash_item *hi, **phi;
652 phi = &hash->items[hashidx(name) % hash->csize];
653 while (*phi) {
654 hi = *phi;
655 if (strcmp(hi->name, name) == 0) {
656 hash->glen -= (strlen(name) + 1);
657 hash->nel--;
658 *phi = hi->next;
659 free(hi);
660 break;
662 phi = &hi->next;
666 /* ------ some useful functions ------ */
668 static char *skip_spaces(char *p)
670 while (1) {
671 if (*p == '\\' && p[1] == '\n') {
672 p++;
673 t_lineno++;
674 } else if (*p != ' ' && *p != '\t') {
675 break;
677 p++;
679 return p;
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
685 char *p = *s;
686 while (*(*s)++ != '\0')
687 continue;
688 return p;
691 static char nextchar(char **s)
693 char c, *pps;
695 c = *(*s)++;
696 pps = *s;
697 if (c == '\\')
698 c = bb_process_escape_sequence((const char**)s);
699 if (c == '\\' && *s == pps) { /* unrecognized \z? */
700 c = *(*s); /* yes, fetch z */
701 if (c)
702 (*s)++; /* advance unless z = NUL */
704 return c;
707 static ALWAYS_INLINE int isalnum_(int c)
709 return (isalnum(c) || c == '_');
712 static double my_strtod(char **pp)
714 char *cp = *pp;
715 if (ENABLE_DESKTOP && cp[0] == '0') {
716 /* Might be hex or octal integer: 0x123abc or 07777 */
717 char c = (cp[1] | 0x20);
718 if (c == 'x' || isdigit(cp[1])) {
719 unsigned long long ull = strtoull(cp, pp, 0);
720 if (c == 'x')
721 return ull;
722 c = **pp;
723 if (!isdigit(c) && c != '.')
724 return ull;
725 /* else: it may be a floating number. Examples:
726 * 009.123 (*pp points to '9')
727 * 000.123 (*pp points to '.')
728 * fall through to strtod.
732 return strtod(cp, pp);
735 /* -------- working with variables (set/get/copy/etc) -------- */
737 static xhash *iamarray(var *v)
739 var *a = v;
741 while (a->type & VF_CHILD)
742 a = a->x.parent;
744 if (!(a->type & VF_ARRAY)) {
745 a->type |= VF_ARRAY;
746 a->x.array = hash_init();
748 return a->x.array;
751 static void clear_array(xhash *array)
753 unsigned i;
754 hash_item *hi, *thi;
756 for (i = 0; i < array->csize; i++) {
757 hi = array->items[i];
758 while (hi) {
759 thi = hi;
760 hi = hi->next;
761 free(thi->data.v.string);
762 free(thi);
764 array->items[i] = NULL;
766 array->glen = array->nel = 0;
769 /* clear a variable */
770 static var *clrvar(var *v)
772 if (!(v->type & VF_FSTR))
773 free(v->string);
775 v->type &= VF_DONTTOUCH;
776 v->type |= VF_DIRTY;
777 v->string = NULL;
778 return v;
781 /* assign string value to variable */
782 static var *setvar_p(var *v, char *value)
784 clrvar(v);
785 v->string = value;
786 handle_special(v);
787 return v;
790 /* same as setvar_p but make a copy of string */
791 static var *setvar_s(var *v, const char *value)
793 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
796 /* same as setvar_s but sets USER flag */
797 static var *setvar_u(var *v, const char *value)
799 v = setvar_s(v, value);
800 v->type |= VF_USER;
801 return v;
804 /* set array element to user string */
805 static void setari_u(var *a, int idx, const char *s)
807 var *v;
809 v = findvar(iamarray(a), itoa(idx));
810 setvar_u(v, s);
813 /* assign numeric value to variable */
814 static var *setvar_i(var *v, double value)
816 clrvar(v);
817 v->type |= VF_NUMBER;
818 v->number = value;
819 handle_special(v);
820 return v;
823 static const char *getvar_s(var *v)
825 /* if v is numeric and has no cached string, convert it to string */
826 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
827 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
828 v->string = xstrdup(g_buf);
829 v->type |= VF_CACHED;
831 return (v->string == NULL) ? "" : v->string;
834 static double getvar_i(var *v)
836 char *s;
838 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
839 v->number = 0;
840 s = v->string;
841 if (s && *s) {
842 debug_printf_eval("getvar_i: '%s'->", s);
843 v->number = my_strtod(&s);
844 debug_printf_eval("%f (s:'%s')\n", v->number, s);
845 if (v->type & VF_USER) {
846 s = skip_spaces(s);
847 if (*s != '\0')
848 v->type &= ~VF_USER;
850 } else {
851 debug_printf_eval("getvar_i: '%s'->zero\n", s);
852 v->type &= ~VF_USER;
854 v->type |= VF_CACHED;
856 debug_printf_eval("getvar_i: %f\n", v->number);
857 return v->number;
860 /* Used for operands of bitwise ops */
861 static unsigned long getvar_i_int(var *v)
863 double d = getvar_i(v);
865 /* Casting doubles to longs is undefined for values outside
866 * of target type range. Try to widen it as much as possible */
867 if (d >= 0)
868 return (unsigned long)d;
869 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
870 return - (long) (unsigned long) (-d);
873 static var *copyvar(var *dest, const var *src)
875 if (dest != src) {
876 clrvar(dest);
877 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
878 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
879 dest->number = src->number;
880 if (src->string)
881 dest->string = xstrdup(src->string);
883 handle_special(dest);
884 return dest;
887 static var *incvar(var *v)
889 return setvar_i(v, getvar_i(v) + 1.0);
892 /* return true if v is number or numeric string */
893 static int is_numeric(var *v)
895 getvar_i(v);
896 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
899 /* return 1 when value of v corresponds to true, 0 otherwise */
900 static int istrue(var *v)
902 if (is_numeric(v))
903 return (v->number != 0);
904 return (v->string && v->string[0]);
907 /* temporary variables allocator. Last allocated should be first freed */
908 static var *nvalloc(int n)
910 nvblock *pb = NULL;
911 var *v, *r;
912 int size;
914 while (g_cb) {
915 pb = g_cb;
916 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
917 break;
918 g_cb = g_cb->next;
921 if (!g_cb) {
922 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
923 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
924 g_cb->size = size;
925 g_cb->pos = g_cb->nv;
926 g_cb->prev = pb;
927 /*g_cb->next = NULL; - xzalloc did it */
928 if (pb)
929 pb->next = g_cb;
932 v = r = g_cb->pos;
933 g_cb->pos += n;
935 while (v < g_cb->pos) {
936 v->type = 0;
937 v->string = NULL;
938 v++;
941 return r;
944 static void nvfree(var *v)
946 var *p;
948 if (v < g_cb->nv || v >= g_cb->pos)
949 syntax_error(EMSG_INTERNAL_ERROR);
951 for (p = v; p < g_cb->pos; p++) {
952 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
953 clear_array(iamarray(p));
954 free(p->x.array->items);
955 free(p->x.array);
957 if (p->type & VF_WALK) {
958 walker_list *n;
959 walker_list *w = p->x.walker;
960 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
961 p->x.walker = NULL;
962 while (w) {
963 n = w->prev;
964 debug_printf_walker(" free(%p)\n", w);
965 free(w);
966 w = n;
969 clrvar(p);
972 g_cb->pos = v;
973 while (g_cb->prev && g_cb->pos == g_cb->nv) {
974 g_cb = g_cb->prev;
978 /* ------- awk program text parsing ------- */
980 /* Parse next token pointed by global pos, place results into global ttt.
981 * If token isn't expected, give away. Return token class
983 static uint32_t next_token(uint32_t expected)
985 #define concat_inserted (G.next_token__concat_inserted)
986 #define save_tclass (G.next_token__save_tclass)
987 #define save_info (G.next_token__save_info)
988 /* Initialized to TC_OPTERM: */
989 #define ltclass (G.next_token__ltclass)
991 char *p, *s;
992 const char *tl;
993 uint32_t tc;
994 const uint32_t *ti;
996 if (t_rollback) {
997 t_rollback = FALSE;
999 } else if (concat_inserted) {
1000 concat_inserted = FALSE;
1001 t_tclass = save_tclass;
1002 t_info = save_info;
1004 } else {
1005 p = g_pos;
1006 readnext:
1007 p = skip_spaces(p);
1008 g_lineno = t_lineno;
1009 if (*p == '#')
1010 while (*p != '\n' && *p != '\0')
1011 p++;
1013 if (*p == '\n')
1014 t_lineno++;
1016 if (*p == '\0') {
1017 tc = TC_EOF;
1018 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1020 } else if (*p == '\"') {
1021 /* it's a string */
1022 t_string = s = ++p;
1023 while (*p != '\"') {
1024 char *pp;
1025 if (*p == '\0' || *p == '\n')
1026 syntax_error(EMSG_UNEXP_EOS);
1027 pp = p;
1028 *s++ = nextchar(&pp);
1029 p = pp;
1031 p++;
1032 *s = '\0';
1033 tc = TC_STRING;
1034 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1036 } else if ((expected & TC_REGEXP) && *p == '/') {
1037 /* it's regexp */
1038 t_string = s = ++p;
1039 while (*p != '/') {
1040 if (*p == '\0' || *p == '\n')
1041 syntax_error(EMSG_UNEXP_EOS);
1042 *s = *p++;
1043 if (*s++ == '\\') {
1044 char *pp = p;
1045 s[-1] = bb_process_escape_sequence((const char **)&pp);
1046 if (*p == '\\')
1047 *s++ = '\\';
1048 if (pp == p)
1049 *s++ = *p++;
1050 else
1051 p = pp;
1054 p++;
1055 *s = '\0';
1056 tc = TC_REGEXP;
1057 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1059 } else if (*p == '.' || isdigit(*p)) {
1060 /* it's a number */
1061 char *pp = p;
1062 t_double = my_strtod(&pp);
1063 p = pp;
1064 if (*p == '.')
1065 syntax_error(EMSG_UNEXP_TOKEN);
1066 tc = TC_NUMBER;
1067 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1069 } else {
1070 /* search for something known */
1071 tl = tokenlist;
1072 tc = 0x00000001;
1073 ti = tokeninfo;
1074 while (*tl) {
1075 int l = (unsigned char) *tl++;
1076 if (l == (unsigned char) NTCC) {
1077 tc <<= 1;
1078 continue;
1080 /* if token class is expected,
1081 * token matches,
1082 * and it's not a longer word,
1084 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1085 && strncmp(p, tl, l) == 0
1086 && !((tc & TC_WORD) && isalnum_(p[l]))
1088 /* then this is what we are looking for */
1089 t_info = *ti;
1090 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1091 p += l;
1092 goto token_found;
1094 ti++;
1095 tl += l;
1097 /* not a known token */
1099 /* is it a name? (var/array/function) */
1100 if (!isalnum_(*p))
1101 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1102 /* yes */
1103 t_string = --p;
1104 while (isalnum_(*++p)) {
1105 p[-1] = *p;
1107 p[-1] = '\0';
1108 tc = TC_VARIABLE;
1109 /* also consume whitespace between functionname and bracket */
1110 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1111 p = skip_spaces(p);
1112 if (*p == '(') {
1113 tc = TC_FUNCTION;
1114 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1115 } else {
1116 if (*p == '[') {
1117 p++;
1118 tc = TC_ARRAY;
1119 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1120 } else
1121 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1124 token_found:
1125 g_pos = p;
1127 /* skipping newlines in some cases */
1128 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1129 goto readnext;
1131 /* insert concatenation operator when needed */
1132 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1133 concat_inserted = TRUE;
1134 save_tclass = tc;
1135 save_info = t_info;
1136 tc = TC_BINOP;
1137 t_info = OC_CONCAT | SS | P(35);
1140 t_tclass = tc;
1142 ltclass = t_tclass;
1144 /* Are we ready for this? */
1145 if (!(ltclass & expected))
1146 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1147 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1149 return ltclass;
1150 #undef concat_inserted
1151 #undef save_tclass
1152 #undef save_info
1153 #undef ltclass
1156 static void rollback_token(void)
1158 t_rollback = TRUE;
1161 static node *new_node(uint32_t info)
1163 node *n;
1165 n = xzalloc(sizeof(node));
1166 n->info = info;
1167 n->lineno = g_lineno;
1168 return n;
1171 static void mk_re_node(const char *s, node *n, regex_t *re)
1173 n->info = OC_REGEXP;
1174 n->l.re = re;
1175 n->r.ire = re + 1;
1176 xregcomp(re, s, REG_EXTENDED);
1177 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1180 static node *condition(void)
1182 next_token(TC_SEQSTART);
1183 return parse_expr(TC_SEQTERM);
1186 /* parse expression terminated by given argument, return ptr
1187 * to built subtree. Terminator is eaten by parse_expr */
1188 static node *parse_expr(uint32_t iexp)
1190 node sn;
1191 node *cn = &sn;
1192 node *vn, *glptr;
1193 uint32_t tc, xtc;
1194 var *v;
1196 debug_printf_parse("%s(%x)\n", __func__, iexp);
1198 sn.info = PRIMASK;
1199 sn.r.n = glptr = NULL;
1200 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1202 while (!((tc = next_token(xtc)) & iexp)) {
1204 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1205 /* input redirection (<) attached to glptr node */
1206 debug_printf_parse("%s: input redir\n", __func__);
1207 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1208 cn->a.n = glptr;
1209 xtc = TC_OPERAND | TC_UOPPRE;
1210 glptr = NULL;
1212 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1213 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1214 /* for binary and postfix-unary operators, jump back over
1215 * previous operators with higher priority */
1216 vn = cn;
1217 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1218 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1220 vn = vn->a.n;
1222 if ((t_info & OPCLSMASK) == OC_TERNARY)
1223 t_info += P(6);
1224 cn = vn->a.n->r.n = new_node(t_info);
1225 cn->a.n = vn->a.n;
1226 if (tc & TC_BINOP) {
1227 cn->l.n = vn;
1228 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1229 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1230 /* it's a pipe */
1231 next_token(TC_GETLINE);
1232 /* give maximum priority to this pipe */
1233 cn->info &= ~PRIMASK;
1234 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1236 } else {
1237 cn->r.n = vn;
1238 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1240 vn->a.n = cn;
1242 } else {
1243 debug_printf_parse("%s: other\n", __func__);
1244 /* for operands and prefix-unary operators, attach them
1245 * to last node */
1246 vn = cn;
1247 cn = vn->r.n = new_node(t_info);
1248 cn->a.n = vn;
1249 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1250 if (tc & (TC_OPERAND | TC_REGEXP)) {
1251 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1252 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1253 /* one should be very careful with switch on tclass -
1254 * only simple tclasses should be used! */
1255 switch (tc) {
1256 case TC_VARIABLE:
1257 case TC_ARRAY:
1258 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1259 cn->info = OC_VAR;
1260 v = hash_search(ahash, t_string);
1261 if (v != NULL) {
1262 cn->info = OC_FNARG;
1263 cn->l.aidx = v->x.aidx;
1264 } else {
1265 cn->l.v = newvar(t_string);
1267 if (tc & TC_ARRAY) {
1268 cn->info |= xS;
1269 cn->r.n = parse_expr(TC_ARRTERM);
1271 break;
1273 case TC_NUMBER:
1274 case TC_STRING:
1275 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1276 cn->info = OC_VAR;
1277 v = cn->l.v = xzalloc(sizeof(var));
1278 if (tc & TC_NUMBER)
1279 setvar_i(v, t_double);
1280 else
1281 setvar_s(v, t_string);
1282 break;
1284 case TC_REGEXP:
1285 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1286 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1287 break;
1289 case TC_FUNCTION:
1290 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1291 cn->info = OC_FUNC;
1292 cn->r.f = newfunc(t_string);
1293 cn->l.n = condition();
1294 break;
1296 case TC_SEQSTART:
1297 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1298 cn = vn->r.n = parse_expr(TC_SEQTERM);
1299 if (!cn)
1300 syntax_error("Empty sequence");
1301 cn->a.n = vn;
1302 break;
1304 case TC_GETLINE:
1305 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1306 glptr = cn;
1307 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308 break;
1310 case TC_BUILTIN:
1311 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1312 cn->l.n = condition();
1313 break;
1319 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1320 return sn.r.n;
1323 /* add node to chain. Return ptr to alloc'd node */
1324 static node *chain_node(uint32_t info)
1326 node *n;
1328 if (!seq->first)
1329 seq->first = seq->last = new_node(0);
1331 if (seq->programname != g_progname) {
1332 seq->programname = g_progname;
1333 n = chain_node(OC_NEWSOURCE);
1334 n->l.new_progname = xstrdup(g_progname);
1337 n = seq->last;
1338 n->info = info;
1339 seq->last = n->a.n = new_node(OC_DONE);
1341 return n;
1344 static void chain_expr(uint32_t info)
1346 node *n;
1348 n = chain_node(info);
1349 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1350 if (t_tclass & TC_GRPTERM)
1351 rollback_token();
1354 static node *chain_loop(node *nn)
1356 node *n, *n2, *save_brk, *save_cont;
1358 save_brk = break_ptr;
1359 save_cont = continue_ptr;
1361 n = chain_node(OC_BR | Vx);
1362 continue_ptr = new_node(OC_EXEC);
1363 break_ptr = new_node(OC_EXEC);
1364 chain_group();
1365 n2 = chain_node(OC_EXEC | Vx);
1366 n2->l.n = nn;
1367 n2->a.n = n;
1368 continue_ptr->a.n = n2;
1369 break_ptr->a.n = n->r.n = seq->last;
1371 continue_ptr = save_cont;
1372 break_ptr = save_brk;
1374 return n;
1377 /* parse group and attach it to chain */
1378 static void chain_group(void)
1380 uint32_t c;
1381 node *n, *n2, *n3;
1383 do {
1384 c = next_token(TC_GRPSEQ);
1385 } while (c & TC_NEWLINE);
1387 if (c & TC_GRPSTART) {
1388 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1389 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1390 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1391 if (t_tclass & TC_NEWLINE)
1392 continue;
1393 rollback_token();
1394 chain_group();
1396 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1397 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1398 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1399 rollback_token();
1400 chain_expr(OC_EXEC | Vx);
1401 } else {
1402 /* TC_STATEMNT */
1403 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1404 switch (t_info & OPCLSMASK) {
1405 case ST_IF:
1406 debug_printf_parse("%s: ST_IF\n", __func__);
1407 n = chain_node(OC_BR | Vx);
1408 n->l.n = condition();
1409 chain_group();
1410 n2 = chain_node(OC_EXEC);
1411 n->r.n = seq->last;
1412 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1413 chain_group();
1414 n2->a.n = seq->last;
1415 } else {
1416 rollback_token();
1418 break;
1420 case ST_WHILE:
1421 debug_printf_parse("%s: ST_WHILE\n", __func__);
1422 n2 = condition();
1423 n = chain_loop(NULL);
1424 n->l.n = n2;
1425 break;
1427 case ST_DO:
1428 debug_printf_parse("%s: ST_DO\n", __func__);
1429 n2 = chain_node(OC_EXEC);
1430 n = chain_loop(NULL);
1431 n2->a.n = n->a.n;
1432 next_token(TC_WHILE);
1433 n->l.n = condition();
1434 break;
1436 case ST_FOR:
1437 debug_printf_parse("%s: ST_FOR\n", __func__);
1438 next_token(TC_SEQSTART);
1439 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1440 if (t_tclass & TC_SEQTERM) { /* for-in */
1441 if ((n2->info & OPCLSMASK) != OC_IN)
1442 syntax_error(EMSG_UNEXP_TOKEN);
1443 n = chain_node(OC_WALKINIT | VV);
1444 n->l.n = n2->l.n;
1445 n->r.n = n2->r.n;
1446 n = chain_loop(NULL);
1447 n->info = OC_WALKNEXT | Vx;
1448 n->l.n = n2->l.n;
1449 } else { /* for (;;) */
1450 n = chain_node(OC_EXEC | Vx);
1451 n->l.n = n2;
1452 n2 = parse_expr(TC_SEMICOL);
1453 n3 = parse_expr(TC_SEQTERM);
1454 n = chain_loop(n3);
1455 n->l.n = n2;
1456 if (!n2)
1457 n->info = OC_EXEC;
1459 break;
1461 case OC_PRINT:
1462 case OC_PRINTF:
1463 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1464 n = chain_node(t_info);
1465 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1466 if (t_tclass & TC_OUTRDR) {
1467 n->info |= t_info;
1468 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1470 if (t_tclass & TC_GRPTERM)
1471 rollback_token();
1472 break;
1474 case OC_BREAK:
1475 debug_printf_parse("%s: OC_BREAK\n", __func__);
1476 n = chain_node(OC_EXEC);
1477 n->a.n = break_ptr;
1478 break;
1480 case OC_CONTINUE:
1481 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1482 n = chain_node(OC_EXEC);
1483 n->a.n = continue_ptr;
1484 break;
1486 /* delete, next, nextfile, return, exit */
1487 default:
1488 debug_printf_parse("%s: default\n", __func__);
1489 chain_expr(t_info);
1494 static void parse_program(char *p)
1496 uint32_t tclass;
1497 node *cn;
1498 func *f;
1499 var *v;
1501 g_pos = p;
1502 t_lineno = 1;
1503 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1504 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1506 if (tclass & TC_OPTERM) {
1507 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1508 continue;
1511 seq = &mainseq;
1512 if (tclass & TC_BEGIN) {
1513 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1514 seq = &beginseq;
1515 chain_group();
1517 } else if (tclass & TC_END) {
1518 debug_printf_parse("%s: TC_END\n", __func__);
1519 seq = &endseq;
1520 chain_group();
1522 } else if (tclass & TC_FUNCDECL) {
1523 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1524 next_token(TC_FUNCTION);
1525 g_pos++;
1526 f = newfunc(t_string);
1527 f->body.first = NULL;
1528 f->nargs = 0;
1529 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1530 v = findvar(ahash, t_string);
1531 v->x.aidx = f->nargs++;
1533 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1534 break;
1536 seq = &f->body;
1537 chain_group();
1538 clear_array(ahash);
1540 } else if (tclass & TC_OPSEQ) {
1541 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1542 rollback_token();
1543 cn = chain_node(OC_TEST);
1544 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1545 if (t_tclass & TC_GRPSTART) {
1546 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1547 rollback_token();
1548 chain_group();
1549 } else {
1550 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1551 chain_node(OC_PRINT);
1553 cn->r.n = mainseq.last;
1555 } else /* if (tclass & TC_GRPSTART) */ {
1556 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1557 rollback_token();
1558 chain_group();
1561 debug_printf_parse("%s: TC_EOF\n", __func__);
1565 /* -------- program execution part -------- */
1567 static node *mk_splitter(const char *s, tsplitter *spl)
1569 regex_t *re, *ire;
1570 node *n;
1572 re = &spl->re[0];
1573 ire = &spl->re[1];
1574 n = &spl->n;
1575 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1576 regfree(re);
1577 regfree(ire); // TODO: nuke ire, use re+1?
1579 if (s[0] && s[1]) { /* strlen(s) > 1 */
1580 mk_re_node(s, n, re);
1581 } else {
1582 n->info = (uint32_t) s[0];
1585 return n;
1588 /* use node as a regular expression. Supplied with node ptr and regex_t
1589 * storage space. Return ptr to regex (if result points to preg, it should
1590 * be later regfree'd manually
1592 static regex_t *as_regex(node *op, regex_t *preg)
1594 int cflags;
1595 var *v;
1596 const char *s;
1598 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1599 return icase ? op->r.ire : op->l.re;
1601 v = nvalloc(1);
1602 s = getvar_s(evaluate(op, v));
1604 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1605 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1606 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1607 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1608 * (maybe gsub is not supposed to use REG_EXTENDED?).
1610 if (regcomp(preg, s, cflags)) {
1611 cflags &= ~REG_EXTENDED;
1612 xregcomp(preg, s, cflags);
1614 nvfree(v);
1615 return preg;
1618 /* gradually increasing buffer.
1619 * note that we reallocate even if n == old_size,
1620 * and thus there is at least one extra allocated byte.
1622 static char* qrealloc(char *b, int n, int *size)
1624 if (!b || n >= *size) {
1625 *size = n + (n>>1) + 80;
1626 b = xrealloc(b, *size);
1628 return b;
1631 /* resize field storage space */
1632 static void fsrealloc(int size)
1634 int i;
1636 if (size >= maxfields) {
1637 i = maxfields;
1638 maxfields = size + 16;
1639 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1640 for (; i < maxfields; i++) {
1641 Fields[i].type = VF_SPECIAL;
1642 Fields[i].string = NULL;
1645 /* if size < nfields, clear extra field variables */
1646 for (i = size; i < nfields; i++) {
1647 clrvar(Fields + i);
1649 nfields = size;
1652 static int awk_split(const char *s, node *spl, char **slist)
1654 int l, n;
1655 char c[4];
1656 char *s1;
1657 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1659 /* in worst case, each char would be a separate field */
1660 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1661 strcpy(s1, s);
1663 c[0] = c[1] = (char)spl->info;
1664 c[2] = c[3] = '\0';
1665 if (*getvar_s(intvar[RS]) == '\0')
1666 c[2] = '\n';
1668 n = 0;
1669 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1670 if (!*s)
1671 return n; /* "": zero fields */
1672 n++; /* at least one field will be there */
1673 do {
1674 l = strcspn(s, c+2); /* len till next NUL or \n */
1675 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1676 && pmatch[0].rm_so <= l
1678 l = pmatch[0].rm_so;
1679 if (pmatch[0].rm_eo == 0) {
1680 l++;
1681 pmatch[0].rm_eo++;
1683 n++; /* we saw yet another delimiter */
1684 } else {
1685 pmatch[0].rm_eo = l;
1686 if (s[l])
1687 pmatch[0].rm_eo++;
1689 memcpy(s1, s, l);
1690 /* make sure we remove *all* of the separator chars */
1691 do {
1692 s1[l] = '\0';
1693 } while (++l < pmatch[0].rm_eo);
1694 nextword(&s1);
1695 s += pmatch[0].rm_eo;
1696 } while (*s);
1697 return n;
1699 if (c[0] == '\0') { /* null split */
1700 while (*s) {
1701 *s1++ = *s++;
1702 *s1++ = '\0';
1703 n++;
1705 return n;
1707 if (c[0] != ' ') { /* single-character split */
1708 if (icase) {
1709 c[0] = toupper(c[0]);
1710 c[1] = tolower(c[1]);
1712 if (*s1)
1713 n++;
1714 while ((s1 = strpbrk(s1, c)) != NULL) {
1715 *s1++ = '\0';
1716 n++;
1718 return n;
1720 /* space split */
1721 while (*s) {
1722 s = skip_whitespace(s);
1723 if (!*s)
1724 break;
1725 n++;
1726 while (*s && !isspace(*s))
1727 *s1++ = *s++;
1728 *s1++ = '\0';
1730 return n;
1733 static void split_f0(void)
1735 /* static char *fstrings; */
1736 #define fstrings (G.split_f0__fstrings)
1738 int i, n;
1739 char *s;
1741 if (is_f0_split)
1742 return;
1744 is_f0_split = TRUE;
1745 free(fstrings);
1746 fsrealloc(0);
1747 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1748 fsrealloc(n);
1749 s = fstrings;
1750 for (i = 0; i < n; i++) {
1751 Fields[i].string = nextword(&s);
1752 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1755 /* set NF manually to avoid side effects */
1756 clrvar(intvar[NF]);
1757 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1758 intvar[NF]->number = nfields;
1759 #undef fstrings
1762 /* perform additional actions when some internal variables changed */
1763 static void handle_special(var *v)
1765 int n;
1766 char *b;
1767 const char *sep, *s;
1768 int sl, l, len, i, bsize;
1770 if (!(v->type & VF_SPECIAL))
1771 return;
1773 if (v == intvar[NF]) {
1774 n = (int)getvar_i(v);
1775 fsrealloc(n);
1777 /* recalculate $0 */
1778 sep = getvar_s(intvar[OFS]);
1779 sl = strlen(sep);
1780 b = NULL;
1781 len = 0;
1782 for (i = 0; i < n; i++) {
1783 s = getvar_s(&Fields[i]);
1784 l = strlen(s);
1785 if (b) {
1786 memcpy(b+len, sep, sl);
1787 len += sl;
1789 b = qrealloc(b, len+l+sl, &bsize);
1790 memcpy(b+len, s, l);
1791 len += l;
1793 if (b)
1794 b[len] = '\0';
1795 setvar_p(intvar[F0], b);
1796 is_f0_split = TRUE;
1798 } else if (v == intvar[F0]) {
1799 is_f0_split = FALSE;
1801 } else if (v == intvar[FS]) {
1802 mk_splitter(getvar_s(v), &fsplitter);
1804 } else if (v == intvar[RS]) {
1805 mk_splitter(getvar_s(v), &rsplitter);
1807 } else if (v == intvar[IGNORECASE]) {
1808 icase = istrue(v);
1810 } else { /* $n */
1811 n = getvar_i(intvar[NF]);
1812 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1813 /* right here v is invalid. Just to note... */
1817 /* step through func/builtin/etc arguments */
1818 static node *nextarg(node **pn)
1820 node *n;
1822 n = *pn;
1823 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1824 *pn = n->r.n;
1825 n = n->l.n;
1826 } else {
1827 *pn = NULL;
1829 return n;
1832 static void hashwalk_init(var *v, xhash *array)
1834 hash_item *hi;
1835 unsigned i;
1836 walker_list *w;
1837 walker_list *prev_walker;
1839 if (v->type & VF_WALK) {
1840 prev_walker = v->x.walker;
1841 } else {
1842 v->type |= VF_WALK;
1843 prev_walker = NULL;
1845 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1847 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1848 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1849 w->cur = w->end = w->wbuf;
1850 w->prev = prev_walker;
1851 for (i = 0; i < array->csize; i++) {
1852 hi = array->items[i];
1853 while (hi) {
1854 strcpy(w->end, hi->name);
1855 nextword(&w->end);
1856 hi = hi->next;
1861 static int hashwalk_next(var *v)
1863 walker_list *w = v->x.walker;
1865 if (w->cur >= w->end) {
1866 walker_list *prev_walker = w->prev;
1868 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1869 free(w);
1870 v->x.walker = prev_walker;
1871 return FALSE;
1874 setvar_s(v, nextword(&w->cur));
1875 return TRUE;
1878 /* evaluate node, return 1 when result is true, 0 otherwise */
1879 static int ptest(node *pattern)
1881 /* ptest__v is "static": to save stack space? */
1882 return istrue(evaluate(pattern, &G.ptest__v));
1885 /* read next record from stream rsm into a variable v */
1886 static int awk_getline(rstream *rsm, var *v)
1888 char *b;
1889 regmatch_t pmatch[2];
1890 int size, a, p, pp = 0;
1891 int fd, so, eo, r, rp;
1892 char c, *m, *s;
1894 debug_printf_eval("entered %s()\n", __func__);
1896 /* we're using our own buffer since we need access to accumulating
1897 * characters
1899 fd = fileno(rsm->F);
1900 m = rsm->buffer;
1901 a = rsm->adv;
1902 p = rsm->pos;
1903 size = rsm->size;
1904 c = (char) rsplitter.n.info;
1905 rp = 0;
1907 if (!m)
1908 m = qrealloc(m, 256, &size);
1910 do {
1911 b = m + a;
1912 so = eo = p;
1913 r = 1;
1914 if (p > 0) {
1915 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1916 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1917 b, 1, pmatch, 0) == 0) {
1918 so = pmatch[0].rm_so;
1919 eo = pmatch[0].rm_eo;
1920 if (b[eo] != '\0')
1921 break;
1923 } else if (c != '\0') {
1924 s = strchr(b+pp, c);
1925 if (!s)
1926 s = memchr(b+pp, '\0', p - pp);
1927 if (s) {
1928 so = eo = s-b;
1929 eo++;
1930 break;
1932 } else {
1933 while (b[rp] == '\n')
1934 rp++;
1935 s = strstr(b+rp, "\n\n");
1936 if (s) {
1937 so = eo = s-b;
1938 while (b[eo] == '\n')
1939 eo++;
1940 if (b[eo] != '\0')
1941 break;
1946 if (a > 0) {
1947 memmove(m, m+a, p+1);
1948 b = m;
1949 a = 0;
1952 m = qrealloc(m, a+p+128, &size);
1953 b = m + a;
1954 pp = p;
1955 p += safe_read(fd, b+p, size-p-1);
1956 if (p < pp) {
1957 p = 0;
1958 r = 0;
1959 setvar_i(intvar[ERRNO], errno);
1961 b[p] = '\0';
1963 } while (p > pp);
1965 if (p == 0) {
1966 r--;
1967 } else {
1968 c = b[so]; b[so] = '\0';
1969 setvar_s(v, b+rp);
1970 v->type |= VF_USER;
1971 b[so] = c;
1972 c = b[eo]; b[eo] = '\0';
1973 setvar_s(intvar[RT], b+so);
1974 b[eo] = c;
1977 rsm->buffer = m;
1978 rsm->adv = a + eo;
1979 rsm->pos = p - eo;
1980 rsm->size = size;
1982 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1984 return r;
1987 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1989 int r = 0;
1990 char c;
1991 const char *s = format;
1993 if (int_as_int && n == (int)n) {
1994 r = snprintf(b, size, "%d", (int)n);
1995 } else {
1996 do { c = *s; } while (c && *++s);
1997 if (strchr("diouxX", c)) {
1998 r = snprintf(b, size, format, (int)n);
1999 } else if (strchr("eEfgG", c)) {
2000 r = snprintf(b, size, format, n);
2001 } else {
2002 syntax_error(EMSG_INV_FMT);
2005 return r;
2008 /* formatted output into an allocated buffer, return ptr to buffer */
2009 static char *awk_printf(node *n)
2011 char *b = NULL;
2012 char *fmt, *s, *f;
2013 const char *s1;
2014 int i, j, incr, bsize;
2015 char c, c1;
2016 var *v, *arg;
2018 v = nvalloc(1);
2019 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2021 i = 0;
2022 while (*f) {
2023 s = f;
2024 while (*f && (*f != '%' || *++f == '%'))
2025 f++;
2026 while (*f && !isalpha(*f)) {
2027 if (*f == '*')
2028 syntax_error("%*x formats are not supported");
2029 f++;
2032 incr = (f - s) + MAXVARFMT;
2033 b = qrealloc(b, incr + i, &bsize);
2034 c = *f;
2035 if (c != '\0')
2036 f++;
2037 c1 = *f;
2038 *f = '\0';
2039 arg = evaluate(nextarg(&n), v);
2041 j = i;
2042 if (c == 'c' || !c) {
2043 i += sprintf(b+i, s, is_numeric(arg) ?
2044 (char)getvar_i(arg) : *getvar_s(arg));
2045 } else if (c == 's') {
2046 s1 = getvar_s(arg);
2047 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2048 i += sprintf(b+i, s, s1);
2049 } else {
2050 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2052 *f = c1;
2054 /* if there was an error while sprintf, return value is negative */
2055 if (i < j)
2056 i = j;
2059 free(fmt);
2060 nvfree(v);
2061 b = xrealloc(b, i + 1);
2062 b[i] = '\0';
2063 return b;
2066 /* Common substitution routine.
2067 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2068 * store result into (dest), return number of substitutions.
2069 * If nm = 0, replace all matches.
2070 * If src or dst is NULL, use $0.
2071 * If subexp != 0, enable subexpression matching (\1-\9).
2073 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2075 char *resbuf;
2076 const char *sp;
2077 int match_no, residx, replen, resbufsize;
2078 int regexec_flags;
2079 regmatch_t pmatch[10];
2080 regex_t sreg, *regex;
2082 resbuf = NULL;
2083 residx = 0;
2084 match_no = 0;
2085 regexec_flags = 0;
2086 regex = as_regex(rn, &sreg);
2087 sp = getvar_s(src ? src : intvar[F0]);
2088 replen = strlen(repl);
2089 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2090 int so = pmatch[0].rm_so;
2091 int eo = pmatch[0].rm_eo;
2093 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2094 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2095 memcpy(resbuf + residx, sp, eo);
2096 residx += eo;
2097 if (++match_no >= nm) {
2098 const char *s;
2099 int nbs;
2101 /* replace */
2102 residx -= (eo - so);
2103 nbs = 0;
2104 for (s = repl; *s; s++) {
2105 char c = resbuf[residx++] = *s;
2106 if (c == '\\') {
2107 nbs++;
2108 continue;
2110 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2111 int j;
2112 residx -= ((nbs + 3) >> 1);
2113 j = 0;
2114 if (c != '&') {
2115 j = c - '0';
2116 nbs++;
2118 if (nbs % 2) {
2119 resbuf[residx++] = c;
2120 } else {
2121 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2122 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2123 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2124 residx += n;
2127 nbs = 0;
2131 regexec_flags = REG_NOTBOL;
2132 sp += eo;
2133 if (match_no == nm)
2134 break;
2135 if (eo == so) {
2136 /* Empty match (e.g. "b*" will match anywhere).
2137 * Advance by one char. */
2138 //BUG (bug 1333):
2139 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2140 //... and will erroneously match "b" even though it is NOT at the word start.
2141 //we need REG_NOTBOW but it does not exist...
2142 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2143 //it should be able to do it correctly.
2144 /* Subtle: this is safe only because
2145 * qrealloc allocated at least one extra byte */
2146 resbuf[residx] = *sp;
2147 if (*sp == '\0')
2148 goto ret;
2149 sp++;
2150 residx++;
2154 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2155 strcpy(resbuf + residx, sp);
2156 ret:
2157 //bb_error_msg("end sp:'%s'%p", sp,sp);
2158 setvar_p(dest ? dest : intvar[F0], resbuf);
2159 if (regex == &sreg)
2160 regfree(regex);
2161 return match_no;
2164 static NOINLINE int do_mktime(const char *ds)
2166 struct tm then;
2167 int count;
2169 /*memset(&then, 0, sizeof(then)); - not needed */
2170 then.tm_isdst = -1; /* default is unknown */
2172 /* manpage of mktime says these fields are ints,
2173 * so we can sscanf stuff directly into them */
2174 count = sscanf(ds, "%u %u %u %u %u %u %d",
2175 &then.tm_year, &then.tm_mon, &then.tm_mday,
2176 &then.tm_hour, &then.tm_min, &then.tm_sec,
2177 &then.tm_isdst);
2179 if (count < 6
2180 || (unsigned)then.tm_mon < 1
2181 || (unsigned)then.tm_year < 1900
2183 return -1;
2186 then.tm_mon -= 1;
2187 then.tm_year -= 1900;
2189 return mktime(&then);
2192 static NOINLINE var *exec_builtin(node *op, var *res)
2194 #define tspl (G.exec_builtin__tspl)
2196 var *tv;
2197 node *an[4];
2198 var *av[4];
2199 const char *as[4];
2200 regmatch_t pmatch[2];
2201 regex_t sreg, *re;
2202 node *spl;
2203 uint32_t isr, info;
2204 int nargs;
2205 time_t tt;
2206 int i, l, ll, n;
2208 tv = nvalloc(4);
2209 isr = info = op->info;
2210 op = op->l.n;
2212 av[2] = av[3] = NULL;
2213 for (i = 0; i < 4 && op; i++) {
2214 an[i] = nextarg(&op);
2215 if (isr & 0x09000000)
2216 av[i] = evaluate(an[i], &tv[i]);
2217 if (isr & 0x08000000)
2218 as[i] = getvar_s(av[i]);
2219 isr >>= 1;
2222 nargs = i;
2223 if ((uint32_t)nargs < (info >> 30))
2224 syntax_error(EMSG_TOO_FEW_ARGS);
2226 info &= OPNMASK;
2227 switch (info) {
2229 case B_a2:
2230 if (ENABLE_FEATURE_AWK_LIBM)
2231 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2232 else
2233 syntax_error(EMSG_NO_MATH);
2234 break;
2236 case B_sp: {
2237 char *s, *s1;
2239 if (nargs > 2) {
2240 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2241 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2242 } else {
2243 spl = &fsplitter.n;
2246 n = awk_split(as[0], spl, &s);
2247 s1 = s;
2248 clear_array(iamarray(av[1]));
2249 for (i = 1; i <= n; i++)
2250 setari_u(av[1], i, nextword(&s));
2251 free(s1);
2252 setvar_i(res, n);
2253 break;
2256 case B_ss: {
2257 char *s;
2259 l = strlen(as[0]);
2260 i = getvar_i(av[1]) - 1;
2261 if (i > l)
2262 i = l;
2263 if (i < 0)
2264 i = 0;
2265 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2266 if (n < 0)
2267 n = 0;
2268 s = xstrndup(as[0]+i, n);
2269 setvar_p(res, s);
2270 break;
2273 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2274 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2275 case B_an:
2276 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2277 break;
2279 case B_co:
2280 setvar_i(res, ~getvar_i_int(av[0]));
2281 break;
2283 case B_ls:
2284 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2285 break;
2287 case B_or:
2288 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2289 break;
2291 case B_rs:
2292 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2293 break;
2295 case B_xo:
2296 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2297 break;
2299 case B_lo:
2300 case B_up: {
2301 char *s, *s1;
2302 s1 = s = xstrdup(as[0]);
2303 while (*s1) {
2304 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2305 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2306 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2307 s1++;
2309 setvar_p(res, s);
2310 break;
2313 case B_ix:
2314 n = 0;
2315 ll = strlen(as[1]);
2316 l = strlen(as[0]) - ll;
2317 if (ll > 0 && l >= 0) {
2318 if (!icase) {
2319 char *s = strstr(as[0], as[1]);
2320 if (s)
2321 n = (s - as[0]) + 1;
2322 } else {
2323 /* this piece of code is terribly slow and
2324 * really should be rewritten
2326 for (i = 0; i <= l; i++) {
2327 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2328 n = i+1;
2329 break;
2334 setvar_i(res, n);
2335 break;
2337 case B_ti:
2338 if (nargs > 1)
2339 tt = getvar_i(av[1]);
2340 else
2341 time(&tt);
2342 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2343 i = strftime(g_buf, MAXVARFMT,
2344 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2345 localtime(&tt));
2346 g_buf[i] = '\0';
2347 setvar_s(res, g_buf);
2348 break;
2350 case B_mt:
2351 setvar_i(res, do_mktime(as[0]));
2352 break;
2354 case B_ma:
2355 re = as_regex(an[1], &sreg);
2356 n = regexec(re, as[0], 1, pmatch, 0);
2357 if (n == 0) {
2358 pmatch[0].rm_so++;
2359 pmatch[0].rm_eo++;
2360 } else {
2361 pmatch[0].rm_so = 0;
2362 pmatch[0].rm_eo = -1;
2364 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2365 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2366 setvar_i(res, pmatch[0].rm_so);
2367 if (re == &sreg)
2368 regfree(re);
2369 break;
2371 case B_ge:
2372 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2373 break;
2375 case B_gs:
2376 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2377 break;
2379 case B_su:
2380 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2381 break;
2384 nvfree(tv);
2385 return res;
2386 #undef tspl
2390 * Evaluate node - the heart of the program. Supplied with subtree
2391 * and place where to store result. returns ptr to result.
2393 #define XC(n) ((n) >> 8)
2395 static var *evaluate(node *op, var *res)
2397 /* This procedure is recursive so we should count every byte */
2398 #define fnargs (G.evaluate__fnargs)
2399 /* seed is initialized to 1 */
2400 #define seed (G.evaluate__seed)
2401 #define sreg (G.evaluate__sreg)
2403 var *v1;
2405 if (!op)
2406 return setvar_s(res, NULL);
2408 debug_printf_eval("entered %s()\n", __func__);
2410 v1 = nvalloc(2);
2412 while (op) {
2413 struct {
2414 var *v;
2415 const char *s;
2416 } L = L; /* for compiler */
2417 struct {
2418 var *v;
2419 const char *s;
2420 } R = R;
2421 double L_d = L_d;
2422 uint32_t opinfo;
2423 int opn;
2424 node *op1;
2426 opinfo = op->info;
2427 opn = (opinfo & OPNMASK);
2428 g_lineno = op->lineno;
2429 op1 = op->l.n;
2430 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2432 /* execute inevitable things */
2433 if (opinfo & OF_RES1)
2434 L.v = evaluate(op1, v1);
2435 if (opinfo & OF_RES2)
2436 R.v = evaluate(op->r.n, v1+1);
2437 if (opinfo & OF_STR1) {
2438 L.s = getvar_s(L.v);
2439 debug_printf_eval("L.s:'%s'\n", L.s);
2441 if (opinfo & OF_STR2) {
2442 R.s = getvar_s(R.v);
2443 debug_printf_eval("R.s:'%s'\n", R.s);
2445 if (opinfo & OF_NUM1) {
2446 L_d = getvar_i(L.v);
2447 debug_printf_eval("L_d:%f\n", L_d);
2450 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2451 switch (XC(opinfo & OPCLSMASK)) {
2453 /* -- iterative node type -- */
2455 /* test pattern */
2456 case XC( OC_TEST ):
2457 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2458 /* it's range pattern */
2459 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2460 op->info |= OF_CHECKED;
2461 if (ptest(op1->r.n))
2462 op->info &= ~OF_CHECKED;
2463 op = op->a.n;
2464 } else {
2465 op = op->r.n;
2467 } else {
2468 op = ptest(op1) ? op->a.n : op->r.n;
2470 break;
2472 /* just evaluate an expression, also used as unconditional jump */
2473 case XC( OC_EXEC ):
2474 break;
2476 /* branch, used in if-else and various loops */
2477 case XC( OC_BR ):
2478 op = istrue(L.v) ? op->a.n : op->r.n;
2479 break;
2481 /* initialize for-in loop */
2482 case XC( OC_WALKINIT ):
2483 hashwalk_init(L.v, iamarray(R.v));
2484 break;
2486 /* get next array item */
2487 case XC( OC_WALKNEXT ):
2488 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2489 break;
2491 case XC( OC_PRINT ):
2492 case XC( OC_PRINTF ): {
2493 FILE *F = stdout;
2495 if (op->r.n) {
2496 rstream *rsm = newfile(R.s);
2497 if (!rsm->F) {
2498 if (opn == '|') {
2499 rsm->F = popen(R.s, "w");
2500 if (rsm->F == NULL)
2501 bb_perror_msg_and_die("popen");
2502 rsm->is_pipe = 1;
2503 } else {
2504 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2507 F = rsm->F;
2510 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2511 if (!op1) {
2512 fputs(getvar_s(intvar[F0]), F);
2513 } else {
2514 while (op1) {
2515 var *v = evaluate(nextarg(&op1), v1);
2516 if (v->type & VF_NUMBER) {
2517 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2518 getvar_i(v), TRUE);
2519 fputs(g_buf, F);
2520 } else {
2521 fputs(getvar_s(v), F);
2524 if (op1)
2525 fputs(getvar_s(intvar[OFS]), F);
2528 fputs(getvar_s(intvar[ORS]), F);
2530 } else { /* OC_PRINTF */
2531 char *s = awk_printf(op1);
2532 fputs(s, F);
2533 free(s);
2535 fflush(F);
2536 break;
2539 case XC( OC_DELETE ): {
2540 uint32_t info = op1->info & OPCLSMASK;
2541 var *v;
2543 if (info == OC_VAR) {
2544 v = op1->l.v;
2545 } else if (info == OC_FNARG) {
2546 v = &fnargs[op1->l.aidx];
2547 } else {
2548 syntax_error(EMSG_NOT_ARRAY);
2551 if (op1->r.n) {
2552 const char *s;
2553 clrvar(L.v);
2554 s = getvar_s(evaluate(op1->r.n, v1));
2555 hash_remove(iamarray(v), s);
2556 } else {
2557 clear_array(iamarray(v));
2559 break;
2562 case XC( OC_NEWSOURCE ):
2563 g_progname = op->l.new_progname;
2564 break;
2566 case XC( OC_RETURN ):
2567 copyvar(res, L.v);
2568 break;
2570 case XC( OC_NEXTFILE ):
2571 nextfile = TRUE;
2572 case XC( OC_NEXT ):
2573 nextrec = TRUE;
2574 case XC( OC_DONE ):
2575 clrvar(res);
2576 break;
2578 case XC( OC_EXIT ):
2579 awk_exit(L_d);
2581 /* -- recursive node type -- */
2583 case XC( OC_VAR ):
2584 L.v = op->l.v;
2585 if (L.v == intvar[NF])
2586 split_f0();
2587 goto v_cont;
2589 case XC( OC_FNARG ):
2590 L.v = &fnargs[op->l.aidx];
2591 v_cont:
2592 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2593 break;
2595 case XC( OC_IN ):
2596 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2597 break;
2599 case XC( OC_REGEXP ):
2600 op1 = op;
2601 L.s = getvar_s(intvar[F0]);
2602 goto re_cont;
2604 case XC( OC_MATCH ):
2605 op1 = op->r.n;
2606 re_cont:
2608 regex_t *re = as_regex(op1, &sreg);
2609 int i = regexec(re, L.s, 0, NULL, 0);
2610 if (re == &sreg)
2611 regfree(re);
2612 setvar_i(res, (i == 0) ^ (opn == '!'));
2614 break;
2616 case XC( OC_MOVE ):
2617 debug_printf_eval("MOVE\n");
2618 /* if source is a temporary string, jusk relink it to dest */
2619 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2620 //then L.v ends up being a string, which is wrong
2621 // if (R.v == v1+1 && R.v->string) {
2622 // res = setvar_p(L.v, R.v->string);
2623 // R.v->string = NULL;
2624 // } else {
2625 res = copyvar(L.v, R.v);
2626 // }
2627 break;
2629 case XC( OC_TERNARY ):
2630 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2631 syntax_error(EMSG_POSSIBLE_ERROR);
2632 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2633 break;
2635 case XC( OC_FUNC ): {
2636 var *vbeg, *v;
2637 const char *sv_progname;
2639 if (!op->r.f->body.first)
2640 syntax_error(EMSG_UNDEF_FUNC);
2642 vbeg = v = nvalloc(op->r.f->nargs + 1);
2643 while (op1) {
2644 var *arg = evaluate(nextarg(&op1), v1);
2645 copyvar(v, arg);
2646 v->type |= VF_CHILD;
2647 v->x.parent = arg;
2648 if (++v - vbeg >= op->r.f->nargs)
2649 break;
2652 v = fnargs;
2653 fnargs = vbeg;
2654 sv_progname = g_progname;
2656 res = evaluate(op->r.f->body.first, res);
2658 g_progname = sv_progname;
2659 nvfree(fnargs);
2660 fnargs = v;
2662 break;
2665 case XC( OC_GETLINE ):
2666 case XC( OC_PGETLINE ): {
2667 rstream *rsm;
2668 int i;
2670 if (op1) {
2671 rsm = newfile(L.s);
2672 if (!rsm->F) {
2673 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2674 rsm->F = popen(L.s, "r");
2675 rsm->is_pipe = TRUE;
2676 } else {
2677 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2680 } else {
2681 if (!iF)
2682 iF = next_input_file();
2683 rsm = iF;
2686 if (!rsm || !rsm->F) {
2687 setvar_i(intvar[ERRNO], errno);
2688 setvar_i(res, -1);
2689 break;
2692 if (!op->r.n)
2693 R.v = intvar[F0];
2695 i = awk_getline(rsm, R.v);
2696 if (i > 0 && !op1) {
2697 incvar(intvar[FNR]);
2698 incvar(intvar[NR]);
2700 setvar_i(res, i);
2701 break;
2704 /* simple builtins */
2705 case XC( OC_FBLTIN ): {
2706 double R_d = R_d; /* for compiler */
2708 switch (opn) {
2709 case F_in:
2710 R_d = (int)L_d;
2711 break;
2713 case F_rn:
2714 R_d = (double)rand() / (double)RAND_MAX;
2715 break;
2717 case F_co:
2718 if (ENABLE_FEATURE_AWK_LIBM) {
2719 R_d = cos(L_d);
2720 break;
2723 case F_ex:
2724 if (ENABLE_FEATURE_AWK_LIBM) {
2725 R_d = exp(L_d);
2726 break;
2729 case F_lg:
2730 if (ENABLE_FEATURE_AWK_LIBM) {
2731 R_d = log(L_d);
2732 break;
2735 case F_si:
2736 if (ENABLE_FEATURE_AWK_LIBM) {
2737 R_d = sin(L_d);
2738 break;
2741 case F_sq:
2742 if (ENABLE_FEATURE_AWK_LIBM) {
2743 R_d = sqrt(L_d);
2744 break;
2747 syntax_error(EMSG_NO_MATH);
2748 break;
2750 case F_sr:
2751 R_d = (double)seed;
2752 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2753 srand(seed);
2754 break;
2756 case F_ti:
2757 R_d = time(NULL);
2758 break;
2760 case F_le:
2761 if (!op1)
2762 L.s = getvar_s(intvar[F0]);
2763 R_d = strlen(L.s);
2764 break;
2766 case F_sy:
2767 fflush_all();
2768 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2769 ? (system(L.s) >> 8) : 0;
2770 break;
2772 case F_ff:
2773 if (!op1) {
2774 fflush(stdout);
2775 } else if (L.s && *L.s) {
2776 rstream *rsm = newfile(L.s);
2777 fflush(rsm->F);
2778 } else {
2779 fflush_all();
2781 break;
2783 case F_cl: {
2784 rstream *rsm;
2785 int err = 0;
2786 rsm = (rstream *)hash_search(fdhash, L.s);
2787 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2788 if (rsm) {
2789 debug_printf_eval("OC_FBLTIN F_cl "
2790 "rsm->is_pipe:%d, ->F:%p\n",
2791 rsm->is_pipe, rsm->F);
2792 /* Can be NULL if open failed. Example:
2793 * getline line <"doesnt_exist";
2794 * close("doesnt_exist"); <--- here rsm->F is NULL
2796 if (rsm->F)
2797 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2798 free(rsm->buffer);
2799 hash_remove(fdhash, L.s);
2801 if (err)
2802 setvar_i(intvar[ERRNO], errno);
2803 R_d = (double)err;
2804 break;
2806 } /* switch */
2807 setvar_i(res, R_d);
2808 break;
2811 case XC( OC_BUILTIN ):
2812 res = exec_builtin(op, res);
2813 break;
2815 case XC( OC_SPRINTF ):
2816 setvar_p(res, awk_printf(op1));
2817 break;
2819 case XC( OC_UNARY ): {
2820 double Ld, R_d;
2822 Ld = R_d = getvar_i(R.v);
2823 switch (opn) {
2824 case 'P':
2825 Ld = ++R_d;
2826 goto r_op_change;
2827 case 'p':
2828 R_d++;
2829 goto r_op_change;
2830 case 'M':
2831 Ld = --R_d;
2832 goto r_op_change;
2833 case 'm':
2834 R_d--;
2835 r_op_change:
2836 setvar_i(R.v, R_d);
2837 break;
2838 case '!':
2839 Ld = !istrue(R.v);
2840 break;
2841 case '-':
2842 Ld = -R_d;
2843 break;
2845 setvar_i(res, Ld);
2846 break;
2849 case XC( OC_FIELD ): {
2850 int i = (int)getvar_i(R.v);
2851 if (i == 0) {
2852 res = intvar[F0];
2853 } else {
2854 split_f0();
2855 if (i > nfields)
2856 fsrealloc(i);
2857 res = &Fields[i - 1];
2859 break;
2862 /* concatenation (" ") and index joining (",") */
2863 case XC( OC_CONCAT ):
2864 case XC( OC_COMMA ): {
2865 const char *sep = "";
2866 if ((opinfo & OPCLSMASK) == OC_COMMA)
2867 sep = getvar_s(intvar[SUBSEP]);
2868 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2869 break;
2872 case XC( OC_LAND ):
2873 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2874 break;
2876 case XC( OC_LOR ):
2877 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2878 break;
2880 case XC( OC_BINARY ):
2881 case XC( OC_REPLACE ): {
2882 double R_d = getvar_i(R.v);
2883 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2884 switch (opn) {
2885 case '+':
2886 L_d += R_d;
2887 break;
2888 case '-':
2889 L_d -= R_d;
2890 break;
2891 case '*':
2892 L_d *= R_d;
2893 break;
2894 case '/':
2895 if (R_d == 0)
2896 syntax_error(EMSG_DIV_BY_ZERO);
2897 L_d /= R_d;
2898 break;
2899 case '&':
2900 if (ENABLE_FEATURE_AWK_LIBM)
2901 L_d = pow(L_d, R_d);
2902 else
2903 syntax_error(EMSG_NO_MATH);
2904 break;
2905 case '%':
2906 if (R_d == 0)
2907 syntax_error(EMSG_DIV_BY_ZERO);
2908 L_d -= (int)(L_d / R_d) * R_d;
2909 break;
2911 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2912 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2913 break;
2916 case XC( OC_COMPARE ): {
2917 int i = i; /* for compiler */
2918 double Ld;
2920 if (is_numeric(L.v) && is_numeric(R.v)) {
2921 Ld = getvar_i(L.v) - getvar_i(R.v);
2922 } else {
2923 const char *l = getvar_s(L.v);
2924 const char *r = getvar_s(R.v);
2925 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2927 switch (opn & 0xfe) {
2928 case 0:
2929 i = (Ld > 0);
2930 break;
2931 case 2:
2932 i = (Ld >= 0);
2933 break;
2934 case 4:
2935 i = (Ld == 0);
2936 break;
2938 setvar_i(res, (i == 0) ^ (opn & 1));
2939 break;
2942 default:
2943 syntax_error(EMSG_POSSIBLE_ERROR);
2945 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2946 op = op->a.n;
2947 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2948 break;
2949 if (nextrec)
2950 break;
2951 } /* while (op) */
2953 nvfree(v1);
2954 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2955 return res;
2956 #undef fnargs
2957 #undef seed
2958 #undef sreg
2962 /* -------- main & co. -------- */
2964 static int awk_exit(int r)
2966 var tv;
2967 unsigned i;
2968 hash_item *hi;
2970 zero_out_var(&tv);
2972 if (!exiting) {
2973 exiting = TRUE;
2974 nextrec = FALSE;
2975 evaluate(endseq.first, &tv);
2978 /* waiting for children */
2979 for (i = 0; i < fdhash->csize; i++) {
2980 hi = fdhash->items[i];
2981 while (hi) {
2982 if (hi->data.rs.F && hi->data.rs.is_pipe)
2983 pclose(hi->data.rs.F);
2984 hi = hi->next;
2988 exit(r);
2991 /* if expr looks like "var=value", perform assignment and return 1,
2992 * otherwise return 0 */
2993 static int is_assignment(const char *expr)
2995 char *exprc, *val, *s, *s1;
2997 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2998 return FALSE;
3001 exprc = xstrdup(expr);
3002 val = exprc + (val - expr);
3003 *val++ = '\0';
3005 s = s1 = val;
3006 while ((*s1 = nextchar(&s)) != '\0')
3007 s1++;
3009 setvar_u(newvar(exprc), val);
3010 free(exprc);
3011 return TRUE;
3014 /* switch to next input file */
3015 static rstream *next_input_file(void)
3017 #define rsm (G.next_input_file__rsm)
3018 #define files_happen (G.next_input_file__files_happen)
3020 FILE *F;
3021 const char *fname, *ind;
3023 if (rsm.F)
3024 fclose(rsm.F);
3025 rsm.F = NULL;
3026 rsm.pos = rsm.adv = 0;
3028 for (;;) {
3029 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3030 if (files_happen)
3031 return NULL;
3032 fname = "-";
3033 F = stdin;
3034 break;
3036 ind = getvar_s(incvar(intvar[ARGIND]));
3037 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3038 if (fname && *fname && !is_assignment(fname)) {
3039 F = xfopen_stdin(fname);
3040 break;
3044 files_happen = TRUE;
3045 setvar_s(intvar[FILENAME], fname);
3046 rsm.F = F;
3047 return &rsm;
3048 #undef rsm
3049 #undef files_happen
3052 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3053 int awk_main(int argc, char **argv)
3055 unsigned opt;
3056 char *opt_F;
3057 llist_t *list_v = NULL;
3058 llist_t *list_f = NULL;
3059 int i, j;
3060 var *v;
3061 var tv;
3062 char **envp;
3063 char *vnames = (char *)vNames; /* cheat */
3064 char *vvalues = (char *)vValues;
3066 INIT_G();
3068 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3069 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3070 if (ENABLE_LOCALE_SUPPORT)
3071 setlocale(LC_NUMERIC, "C");
3073 zero_out_var(&tv);
3075 /* allocate global buffer */
3076 g_buf = xmalloc(MAXVARFMT + 1);
3078 vhash = hash_init();
3079 ahash = hash_init();
3080 fdhash = hash_init();
3081 fnhash = hash_init();
3083 /* initialize variables */
3084 for (i = 0; *vnames; i++) {
3085 intvar[i] = v = newvar(nextword(&vnames));
3086 if (*vvalues != '\377')
3087 setvar_s(v, nextword(&vvalues));
3088 else
3089 setvar_i(v, 0);
3091 if (*vnames == '*') {
3092 v->type |= VF_SPECIAL;
3093 vnames++;
3097 handle_special(intvar[FS]);
3098 handle_special(intvar[RS]);
3100 newfile("/dev/stdin")->F = stdin;
3101 newfile("/dev/stdout")->F = stdout;
3102 newfile("/dev/stderr")->F = stderr;
3104 /* Huh, people report that sometimes environ is NULL. Oh well. */
3105 if (environ) for (envp = environ; *envp; envp++) {
3106 /* environ is writable, thus we don't strdup it needlessly */
3107 char *s = *envp;
3108 char *s1 = strchr(s, '=');
3109 if (s1) {
3110 *s1 = '\0';
3111 /* Both findvar and setvar_u take const char*
3112 * as 2nd arg -> environment is not trashed */
3113 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3114 *s1 = '=';
3117 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3118 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3119 argv += optind;
3120 argc -= optind;
3121 if (opt & 0x1)
3122 setvar_s(intvar[FS], opt_F); // -F
3123 while (list_v) { /* -v */
3124 if (!is_assignment(llist_pop(&list_v)))
3125 bb_show_usage();
3127 if (list_f) { /* -f */
3128 do {
3129 char *s = NULL;
3130 FILE *from_file;
3132 g_progname = llist_pop(&list_f);
3133 from_file = xfopen_stdin(g_progname);
3134 /* one byte is reserved for some trick in next_token */
3135 for (i = j = 1; j > 0; i += j) {
3136 s = xrealloc(s, i + 4096);
3137 j = fread(s + i, 1, 4094, from_file);
3139 s[i] = '\0';
3140 fclose(from_file);
3141 parse_program(s + 1);
3142 free(s);
3143 } while (list_f);
3144 argc++;
3145 } else { // no -f: take program from 1st parameter
3146 if (!argc)
3147 bb_show_usage();
3148 g_progname = "cmd. line";
3149 parse_program(*argv++);
3151 if (opt & 0x8) // -W
3152 bb_error_msg("warning: option -W is ignored");
3154 /* fill in ARGV array */
3155 setvar_i(intvar[ARGC], argc);
3156 setari_u(intvar[ARGV], 0, "awk");
3157 i = 0;
3158 while (*argv)
3159 setari_u(intvar[ARGV], ++i, *argv++);
3161 evaluate(beginseq.first, &tv);
3162 if (!mainseq.first && !endseq.first)
3163 awk_exit(EXIT_SUCCESS);
3165 /* input file could already be opened in BEGIN block */
3166 if (!iF)
3167 iF = next_input_file();
3169 /* passing through input files */
3170 while (iF) {
3171 nextfile = FALSE;
3172 setvar_i(intvar[FNR], 0);
3174 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3175 nextrec = FALSE;
3176 incvar(intvar[NR]);
3177 incvar(intvar[FNR]);
3178 evaluate(mainseq.first, &tv);
3180 if (nextfile)
3181 break;
3184 if (i < 0)
3185 syntax_error(strerror(errno));
3187 iF = next_input_file();
3190 awk_exit(EXIT_SUCCESS);
3191 /*return 0;*/