busybox: update to 1.23.2
[tomato.git] / release / src / router / busybox / editors / awk.c
blobf487163af731e3b26d0cb7fca3601ff94200bd93
1 /* vi: set sw=4 ts=4: */
2 /*
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
10 //config:config AWK
11 //config: bool "awk"
12 //config: default y
13 //config: help
14 //config: Awk is used as a pattern scanning and processing language. This is
15 //config: the BusyBox implementation of that programming language.
16 //config:
17 //config:config FEATURE_AWK_LIBM
18 //config: bool "Enable math functions (requires libm)"
19 //config: default y
20 //config: depends on AWK
21 //config: help
22 //config: Enable math functions of the Awk programming language.
23 //config: NOTE: This will require libm to be present for linking.
24 //config:
25 //config:config FEATURE_AWK_GNU_EXTENSIONS
26 //config: bool "Enable a few GNU extensions"
27 //config: default y
28 //config: depends on AWK
29 //config: help
30 //config: Enable a few features from gawk:
31 //config: * command line option -e AWK_PROGRAM
32 //config: * simultaneous use of -f and -e on the command line.
33 //config: This enables the use of awk library files.
34 //config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
36 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
38 //kbuild:lib-$(CONFIG_AWK) += awk.o
40 //usage:#define awk_trivial_usage
41 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42 //usage:#define awk_full_usage "\n\n"
43 //usage: " -v VAR=VAL Set variable"
44 //usage: "\n -F SEP Use SEP as field separator"
45 //usage: "\n -f FILE Read program from FILE"
46 //usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47 //usage: "\n -e AWK_PROGRAM"
48 //usage: )
50 #include "libbb.h"
51 #include "xregex.h"
52 #include <math.h>
54 /* This is a NOEXEC applet. Be very careful! */
57 /* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59 #define debug_printf_walker(...) do {} while (0)
60 #define debug_printf_eval(...) do {} while (0)
61 #define debug_printf_parse(...) do {} while (0)
63 #ifndef debug_printf_walker
64 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
65 #endif
66 #ifndef debug_printf_eval
67 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
68 #endif
69 #ifndef debug_printf_parse
70 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
71 #endif
74 #define OPTSTR_AWK \
75 "F:v:f:" \
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
77 "W:"
78 #define OPTCOMPLSTR_AWK \
79 "v::f::" \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
81 enum {
82 OPTBIT_F, /* define field separator */
83 OPTBIT_v, /* define variable */
84 OPTBIT_f, /* pull in awk program from file */
85 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86 OPTBIT_W, /* -W ignored */
87 OPT_F = 1 << OPTBIT_F,
88 OPT_v = 1 << OPTBIT_v,
89 OPT_f = 1 << OPTBIT_f,
90 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
91 OPT_W = 1 << OPTBIT_W
94 #define MAXVARFMT 240
95 #define MINNVBLOCK 64
97 /* variable flags */
98 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
99 #define VF_ARRAY 0x0002 /* 1 = it's an array */
101 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
105 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
106 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
109 /* these flags are static, don't change them when value is changed */
110 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
112 typedef struct walker_list {
113 char *end;
114 char *cur;
115 struct walker_list *prev;
116 char wbuf[1];
117 } walker_list;
119 /* Variable */
120 typedef struct var_s {
121 unsigned type; /* flags */
122 double number;
123 char *string;
124 union {
125 int aidx; /* func arg idx (for compilation stage) */
126 struct xhash_s *array; /* array ptr */
127 struct var_s *parent; /* for func args, ptr to actual parameter */
128 walker_list *walker; /* list of array elements (for..in) */
129 } x;
130 } var;
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s {
134 struct node_s *first;
135 struct node_s *last;
136 const char *programname;
137 } chain;
139 /* Function */
140 typedef struct func_s {
141 unsigned nargs;
142 struct chain_s body;
143 } func;
145 /* I/O stream */
146 typedef struct rstream_s {
147 FILE *F;
148 char *buffer;
149 int adv;
150 int size;
151 int pos;
152 smallint is_pipe;
153 } rstream;
155 typedef struct hash_item_s {
156 union {
157 struct var_s v; /* variable/array hash */
158 struct rstream_s rs; /* redirect streams hash */
159 struct func_s f; /* functions hash */
160 } data;
161 struct hash_item_s *next; /* next in chain */
162 char name[1]; /* really it's longer */
163 } hash_item;
165 typedef struct xhash_s {
166 unsigned nel; /* num of elements */
167 unsigned csize; /* current hash size */
168 unsigned nprime; /* next hash size in PRIMES[] */
169 unsigned glen; /* summary length of item names */
170 struct hash_item_s **items;
171 } xhash;
173 /* Tree node */
174 typedef struct node_s {
175 uint32_t info;
176 unsigned lineno;
177 union {
178 struct node_s *n;
179 var *v;
180 int aidx;
181 char *new_progname;
182 regex_t *re;
183 } l;
184 union {
185 struct node_s *n;
186 regex_t *ire;
187 func *f;
188 } r;
189 union {
190 struct node_s *n;
191 } a;
192 } node;
194 /* Block of temporary variables */
195 typedef struct nvblock_s {
196 int size;
197 var *pos;
198 struct nvblock_s *prev;
199 struct nvblock_s *next;
200 var nv[];
201 } nvblock;
203 typedef struct tsplitter_s {
204 node n;
205 regex_t re[2];
206 } tsplitter;
208 /* simple token classes */
209 /* Order and hex values are very important!!! See next_token() */
210 #define TC_SEQSTART 1 /* ( */
211 #define TC_SEQTERM (1 << 1) /* ) */
212 #define TC_REGEXP (1 << 2) /* /.../ */
213 #define TC_OUTRDR (1 << 3) /* | > >> */
214 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
215 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
216 #define TC_BINOPX (1 << 6) /* two-opnd operator */
217 #define TC_IN (1 << 7)
218 #define TC_COMMA (1 << 8)
219 #define TC_PIPE (1 << 9) /* input redirection pipe */
220 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
221 #define TC_ARRTERM (1 << 11) /* ] */
222 #define TC_GRPSTART (1 << 12) /* { */
223 #define TC_GRPTERM (1 << 13) /* } */
224 #define TC_SEMICOL (1 << 14)
225 #define TC_NEWLINE (1 << 15)
226 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
227 #define TC_WHILE (1 << 17)
228 #define TC_ELSE (1 << 18)
229 #define TC_BUILTIN (1 << 19)
230 #define TC_GETLINE (1 << 20)
231 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
232 #define TC_BEGIN (1 << 22)
233 #define TC_END (1 << 23)
234 #define TC_EOF (1 << 24)
235 #define TC_VARIABLE (1 << 25)
236 #define TC_ARRAY (1 << 26)
237 #define TC_FUNCTION (1 << 27)
238 #define TC_STRING (1 << 28)
239 #define TC_NUMBER (1 << 29)
241 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
243 /* combined token classes */
244 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245 //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
249 #define TC_STATEMNT (TC_STATX | TC_WHILE)
250 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
252 /* word tokens, cannot mean something else if not expected */
253 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
254 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
256 /* discard newlines after these */
257 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
258 | TC_BINOP | TC_OPTERM)
260 /* what can expression begin with */
261 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
262 /* what can group begin with */
263 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
265 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
266 /* operator is inserted between them */
267 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
268 | TC_STRING | TC_NUMBER | TC_UOPPOST)
269 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
271 #define OF_RES1 0x010000
272 #define OF_RES2 0x020000
273 #define OF_STR1 0x040000
274 #define OF_STR2 0x080000
275 #define OF_NUM1 0x100000
276 #define OF_CHECKED 0x200000
278 /* combined operator flags */
279 #define xx 0
280 #define xV OF_RES2
281 #define xS (OF_RES2 | OF_STR2)
282 #define Vx OF_RES1
283 #define VV (OF_RES1 | OF_RES2)
284 #define Nx (OF_RES1 | OF_NUM1)
285 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
286 #define Sx (OF_RES1 | OF_STR1)
287 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
288 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
290 #define OPCLSMASK 0xFF00
291 #define OPNMASK 0x007F
293 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
294 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
295 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
297 #undef P
298 #undef PRIMASK
299 #undef PRIMASK2
300 #define P(x) (x << 24)
301 #define PRIMASK 0x7F000000
302 #define PRIMASK2 0x7E000000
304 /* Operation classes */
306 #define SHIFT_TIL_THIS 0x0600
307 #define RECUR_FROM_THIS 0x1000
309 enum {
310 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
311 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
313 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
314 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
315 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
317 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
318 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
319 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
320 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
321 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
322 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
323 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
324 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
325 OC_DONE = 0x2800,
327 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
328 ST_WHILE = 0x3300
331 /* simple builtins */
332 enum {
333 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
334 F_ti, F_le, F_sy, F_ff, F_cl
337 /* builtins */
338 enum {
339 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
340 B_ge, B_gs, B_su,
341 B_an, B_co, B_ls, B_or, B_rs, B_xo,
344 /* tokens and their corresponding info values */
346 #define NTC "\377" /* switch to next token class (tc<<1) */
347 #define NTCC '\377'
349 #define OC_B OC_BUILTIN
351 static const char tokenlist[] ALIGN1 =
352 "\1(" NTC
353 "\1)" NTC
354 "\1/" NTC /* REGEXP */
355 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
356 "\2++" "\2--" NTC /* UOPPOST */
357 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
358 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
359 "\2*=" "\2/=" "\2%=" "\2^="
360 "\1+" "\1-" "\3**=" "\2**"
361 "\1/" "\1%" "\1^" "\1*"
362 "\2!=" "\2>=" "\2<=" "\1>"
363 "\1<" "\2!~" "\1~" "\2&&"
364 "\2||" "\1?" "\1:" NTC
365 "\2in" NTC
366 "\1," NTC
367 "\1|" NTC
368 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
369 "\1]" NTC
370 "\1{" NTC
371 "\1}" NTC
372 "\1;" NTC
373 "\1\n" NTC
374 "\2if" "\2do" "\3for" "\5break" /* STATX */
375 "\10continue" "\6delete" "\5print"
376 "\6printf" "\4next" "\10nextfile"
377 "\6return" "\4exit" NTC
378 "\5while" NTC
379 "\4else" NTC
381 "\3and" "\5compl" "\6lshift" "\2or"
382 "\6rshift" "\3xor"
383 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
384 "\3cos" "\3exp" "\3int" "\3log"
385 "\4rand" "\3sin" "\4sqrt" "\5srand"
386 "\6gensub" "\4gsub" "\5index" "\6length"
387 "\5match" "\5split" "\7sprintf" "\3sub"
388 "\6substr" "\7systime" "\10strftime" "\6mktime"
389 "\7tolower" "\7toupper" NTC
390 "\7getline" NTC
391 "\4func" "\10function" NTC
392 "\5BEGIN" NTC
393 "\3END"
394 /* compiler adds trailing "\0" */
397 static const uint32_t tokeninfo[] = {
400 OC_REGEXP,
401 xS|'a', xS|'w', xS|'|',
402 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
403 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
404 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
405 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
406 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
407 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
408 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
409 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
410 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411 OC_IN|SV|P(49), /* in */
412 OC_COMMA|SS|P(80),
413 OC_PGETLINE|SV|P(37),
414 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
415 0, /* ] */
419 0, /* \n */
420 ST_IF, ST_DO, ST_FOR, OC_BREAK,
421 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
422 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
423 OC_RETURN|Vx, OC_EXIT|Nx,
424 ST_WHILE,
425 0, /* else */
427 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
433 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
434 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
436 OC_GETLINE|SV|P(0),
437 0, 0,
439 0 /* END */
442 /* internal variable names and their initial values */
443 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
444 enum {
445 CONVFMT, OFMT, FS, OFS,
446 ORS, RS, RT, FILENAME,
447 SUBSEP, F0, ARGIND, ARGC,
448 ARGV, ERRNO, FNR, NR,
449 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
452 static const char vNames[] ALIGN1 =
453 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
454 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
455 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
456 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
457 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
459 static const char vValues[] ALIGN1 =
460 "%.6g\0" "%.6g\0" " \0" " \0"
461 "\n\0" "\n\0" "\0" "\0"
462 "\034\0" "\0" "\377";
464 /* hash size may grow to these values */
465 #define FIRST_PRIME 61
466 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
469 /* Globals. Split in two parts so that first one is addressed
470 * with (mostly short) negative offsets.
471 * NB: it's unsafe to put members of type "double"
472 * into globals2 (gcc may fail to align them).
474 struct globals {
475 double t_double;
476 chain beginseq, mainseq, endseq;
477 chain *seq;
478 node *break_ptr, *continue_ptr;
479 rstream *iF;
480 xhash *vhash, *ahash, *fdhash, *fnhash;
481 const char *g_progname;
482 int g_lineno;
483 int nfields;
484 int maxfields; /* used in fsrealloc() only */
485 var *Fields;
486 nvblock *g_cb;
487 char *g_pos;
488 char *g_buf;
489 smallint icase;
490 smallint exiting;
491 smallint nextrec;
492 smallint nextfile;
493 smallint is_f0_split;
494 smallint t_rollback;
496 struct globals2 {
497 uint32_t t_info; /* often used */
498 uint32_t t_tclass;
499 char *t_string;
500 int t_lineno;
502 var *intvar[NUM_INTERNAL_VARS]; /* often used */
504 /* former statics from various functions */
505 char *split_f0__fstrings;
507 uint32_t next_token__save_tclass;
508 uint32_t next_token__save_info;
509 uint32_t next_token__ltclass;
510 smallint next_token__concat_inserted;
512 smallint next_input_file__files_happen;
513 rstream next_input_file__rsm;
515 var *evaluate__fnargs;
516 unsigned evaluate__seed;
517 regex_t evaluate__sreg;
519 var ptest__v;
521 tsplitter exec_builtin__tspl;
523 /* biggest and least used members go last */
524 tsplitter fsplitter, rsplitter;
526 #define G1 (ptr_to_globals[-1])
527 #define G (*(struct globals2 *)ptr_to_globals)
528 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
529 /*char G1size[sizeof(G1)]; - 0x74 */
530 /*char Gsize[sizeof(G)]; - 0x1c4 */
531 /* Trying to keep most of members accessible with short offsets: */
532 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
533 #define t_double (G1.t_double )
534 #define beginseq (G1.beginseq )
535 #define mainseq (G1.mainseq )
536 #define endseq (G1.endseq )
537 #define seq (G1.seq )
538 #define break_ptr (G1.break_ptr )
539 #define continue_ptr (G1.continue_ptr)
540 #define iF (G1.iF )
541 #define vhash (G1.vhash )
542 #define ahash (G1.ahash )
543 #define fdhash (G1.fdhash )
544 #define fnhash (G1.fnhash )
545 #define g_progname (G1.g_progname )
546 #define g_lineno (G1.g_lineno )
547 #define nfields (G1.nfields )
548 #define maxfields (G1.maxfields )
549 #define Fields (G1.Fields )
550 #define g_cb (G1.g_cb )
551 #define g_pos (G1.g_pos )
552 #define g_buf (G1.g_buf )
553 #define icase (G1.icase )
554 #define exiting (G1.exiting )
555 #define nextrec (G1.nextrec )
556 #define nextfile (G1.nextfile )
557 #define is_f0_split (G1.is_f0_split )
558 #define t_rollback (G1.t_rollback )
559 #define t_info (G.t_info )
560 #define t_tclass (G.t_tclass )
561 #define t_string (G.t_string )
562 #define t_lineno (G.t_lineno )
563 #define intvar (G.intvar )
564 #define fsplitter (G.fsplitter )
565 #define rsplitter (G.rsplitter )
566 #define INIT_G() do { \
567 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
568 G.next_token__ltclass = TC_OPTERM; \
569 G.evaluate__seed = 1; \
570 } while (0)
573 /* function prototypes */
574 static void handle_special(var *);
575 static node *parse_expr(uint32_t);
576 static void chain_group(void);
577 static var *evaluate(node *, var *);
578 static rstream *next_input_file(void);
579 static int fmt_num(char *, int, const char *, double, int);
580 static int awk_exit(int) NORETURN;
582 /* ---- error handling ---- */
584 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
585 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
586 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
587 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
588 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
589 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
590 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
591 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
592 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
593 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
595 static void zero_out_var(var *vp)
597 memset(vp, 0, sizeof(*vp));
600 static void syntax_error(const char *message) NORETURN;
601 static void syntax_error(const char *message)
603 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
606 /* ---- hash stuff ---- */
608 static unsigned hashidx(const char *name)
610 unsigned idx = 0;
612 while (*name)
613 idx = *name++ + (idx << 6) - idx;
614 return idx;
617 /* create new hash */
618 static xhash *hash_init(void)
620 xhash *newhash;
622 newhash = xzalloc(sizeof(*newhash));
623 newhash->csize = FIRST_PRIME;
624 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
626 return newhash;
629 /* find item in hash, return ptr to data, NULL if not found */
630 static void *hash_search(xhash *hash, const char *name)
632 hash_item *hi;
634 hi = hash->items[hashidx(name) % hash->csize];
635 while (hi) {
636 if (strcmp(hi->name, name) == 0)
637 return &hi->data;
638 hi = hi->next;
640 return NULL;
643 /* grow hash if it becomes too big */
644 static void hash_rebuild(xhash *hash)
646 unsigned newsize, i, idx;
647 hash_item **newitems, *hi, *thi;
649 if (hash->nprime == ARRAY_SIZE(PRIMES))
650 return;
652 newsize = PRIMES[hash->nprime++];
653 newitems = xzalloc(newsize * sizeof(newitems[0]));
655 for (i = 0; i < hash->csize; i++) {
656 hi = hash->items[i];
657 while (hi) {
658 thi = hi;
659 hi = thi->next;
660 idx = hashidx(thi->name) % newsize;
661 thi->next = newitems[idx];
662 newitems[idx] = thi;
666 free(hash->items);
667 hash->csize = newsize;
668 hash->items = newitems;
671 /* find item in hash, add it if necessary. Return ptr to data */
672 static void *hash_find(xhash *hash, const char *name)
674 hash_item *hi;
675 unsigned idx;
676 int l;
678 hi = hash_search(hash, name);
679 if (!hi) {
680 if (++hash->nel / hash->csize > 10)
681 hash_rebuild(hash);
683 l = strlen(name) + 1;
684 hi = xzalloc(sizeof(*hi) + l);
685 strcpy(hi->name, name);
687 idx = hashidx(name) % hash->csize;
688 hi->next = hash->items[idx];
689 hash->items[idx] = hi;
690 hash->glen += l;
692 return &hi->data;
695 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
696 #define newvar(name) ((var*) hash_find(vhash, (name)))
697 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
698 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
700 static void hash_remove(xhash *hash, const char *name)
702 hash_item *hi, **phi;
704 phi = &hash->items[hashidx(name) % hash->csize];
705 while (*phi) {
706 hi = *phi;
707 if (strcmp(hi->name, name) == 0) {
708 hash->glen -= (strlen(name) + 1);
709 hash->nel--;
710 *phi = hi->next;
711 free(hi);
712 break;
714 phi = &hi->next;
718 /* ------ some useful functions ------ */
720 static char *skip_spaces(char *p)
722 while (1) {
723 if (*p == '\\' && p[1] == '\n') {
724 p++;
725 t_lineno++;
726 } else if (*p != ' ' && *p != '\t') {
727 break;
729 p++;
731 return p;
734 /* returns old *s, advances *s past word and terminating NUL */
735 static char *nextword(char **s)
737 char *p = *s;
738 while (*(*s)++ != '\0')
739 continue;
740 return p;
743 static char nextchar(char **s)
745 char c, *pps;
747 c = *(*s)++;
748 pps = *s;
749 if (c == '\\')
750 c = bb_process_escape_sequence((const char**)s);
751 /* Example awk statement:
752 * s = "abc\"def"
753 * we must treat \" as "
755 if (c == '\\' && *s == pps) { /* unrecognized \z? */
756 c = *(*s); /* yes, fetch z */
757 if (c)
758 (*s)++; /* advance unless z = NUL */
760 return c;
763 /* TODO: merge with strcpy_and_process_escape_sequences()?
765 static void unescape_string_in_place(char *s1)
767 char *s = s1;
768 while ((*s1 = nextchar(&s)) != '\0')
769 s1++;
772 static ALWAYS_INLINE int isalnum_(int c)
774 return (isalnum(c) || c == '_');
777 static double my_strtod(char **pp)
779 char *cp = *pp;
780 if (ENABLE_DESKTOP && cp[0] == '0') {
781 /* Might be hex or octal integer: 0x123abc or 07777 */
782 char c = (cp[1] | 0x20);
783 if (c == 'x' || isdigit(cp[1])) {
784 unsigned long long ull = strtoull(cp, pp, 0);
785 if (c == 'x')
786 return ull;
787 c = **pp;
788 if (!isdigit(c) && c != '.')
789 return ull;
790 /* else: it may be a floating number. Examples:
791 * 009.123 (*pp points to '9')
792 * 000.123 (*pp points to '.')
793 * fall through to strtod.
797 return strtod(cp, pp);
800 /* -------- working with variables (set/get/copy/etc) -------- */
802 static xhash *iamarray(var *v)
804 var *a = v;
806 while (a->type & VF_CHILD)
807 a = a->x.parent;
809 if (!(a->type & VF_ARRAY)) {
810 a->type |= VF_ARRAY;
811 a->x.array = hash_init();
813 return a->x.array;
816 static void clear_array(xhash *array)
818 unsigned i;
819 hash_item *hi, *thi;
821 for (i = 0; i < array->csize; i++) {
822 hi = array->items[i];
823 while (hi) {
824 thi = hi;
825 hi = hi->next;
826 free(thi->data.v.string);
827 free(thi);
829 array->items[i] = NULL;
831 array->glen = array->nel = 0;
834 /* clear a variable */
835 static var *clrvar(var *v)
837 if (!(v->type & VF_FSTR))
838 free(v->string);
840 v->type &= VF_DONTTOUCH;
841 v->type |= VF_DIRTY;
842 v->string = NULL;
843 return v;
846 /* assign string value to variable */
847 static var *setvar_p(var *v, char *value)
849 clrvar(v);
850 v->string = value;
851 handle_special(v);
852 return v;
855 /* same as setvar_p but make a copy of string */
856 static var *setvar_s(var *v, const char *value)
858 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
861 /* same as setvar_s but sets USER flag */
862 static var *setvar_u(var *v, const char *value)
864 v = setvar_s(v, value);
865 v->type |= VF_USER;
866 return v;
869 /* set array element to user string */
870 static void setari_u(var *a, int idx, const char *s)
872 var *v;
874 v = findvar(iamarray(a), itoa(idx));
875 setvar_u(v, s);
878 /* assign numeric value to variable */
879 static var *setvar_i(var *v, double value)
881 clrvar(v);
882 v->type |= VF_NUMBER;
883 v->number = value;
884 handle_special(v);
885 return v;
888 static const char *getvar_s(var *v)
890 /* if v is numeric and has no cached string, convert it to string */
891 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
892 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
893 v->string = xstrdup(g_buf);
894 v->type |= VF_CACHED;
896 return (v->string == NULL) ? "" : v->string;
899 static double getvar_i(var *v)
901 char *s;
903 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
904 v->number = 0;
905 s = v->string;
906 if (s && *s) {
907 debug_printf_eval("getvar_i: '%s'->", s);
908 v->number = my_strtod(&s);
909 debug_printf_eval("%f (s:'%s')\n", v->number, s);
910 if (v->type & VF_USER) {
911 s = skip_spaces(s);
912 if (*s != '\0')
913 v->type &= ~VF_USER;
915 } else {
916 debug_printf_eval("getvar_i: '%s'->zero\n", s);
917 v->type &= ~VF_USER;
919 v->type |= VF_CACHED;
921 debug_printf_eval("getvar_i: %f\n", v->number);
922 return v->number;
925 /* Used for operands of bitwise ops */
926 static unsigned long getvar_i_int(var *v)
928 double d = getvar_i(v);
930 /* Casting doubles to longs is undefined for values outside
931 * of target type range. Try to widen it as much as possible */
932 if (d >= 0)
933 return (unsigned long)d;
934 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
935 return - (long) (unsigned long) (-d);
938 static var *copyvar(var *dest, const var *src)
940 if (dest != src) {
941 clrvar(dest);
942 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
943 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
944 dest->number = src->number;
945 if (src->string)
946 dest->string = xstrdup(src->string);
948 handle_special(dest);
949 return dest;
952 static var *incvar(var *v)
954 return setvar_i(v, getvar_i(v) + 1.0);
957 /* return true if v is number or numeric string */
958 static int is_numeric(var *v)
960 getvar_i(v);
961 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
964 /* return 1 when value of v corresponds to true, 0 otherwise */
965 static int istrue(var *v)
967 if (is_numeric(v))
968 return (v->number != 0);
969 return (v->string && v->string[0]);
972 /* temporary variables allocator. Last allocated should be first freed */
973 static var *nvalloc(int n)
975 nvblock *pb = NULL;
976 var *v, *r;
977 int size;
979 while (g_cb) {
980 pb = g_cb;
981 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
982 break;
983 g_cb = g_cb->next;
986 if (!g_cb) {
987 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
988 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
989 g_cb->size = size;
990 g_cb->pos = g_cb->nv;
991 g_cb->prev = pb;
992 /*g_cb->next = NULL; - xzalloc did it */
993 if (pb)
994 pb->next = g_cb;
997 v = r = g_cb->pos;
998 g_cb->pos += n;
1000 while (v < g_cb->pos) {
1001 v->type = 0;
1002 v->string = NULL;
1003 v++;
1006 return r;
1009 static void nvfree(var *v)
1011 var *p;
1013 if (v < g_cb->nv || v >= g_cb->pos)
1014 syntax_error(EMSG_INTERNAL_ERROR);
1016 for (p = v; p < g_cb->pos; p++) {
1017 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018 clear_array(iamarray(p));
1019 free(p->x.array->items);
1020 free(p->x.array);
1022 if (p->type & VF_WALK) {
1023 walker_list *n;
1024 walker_list *w = p->x.walker;
1025 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1026 p->x.walker = NULL;
1027 while (w) {
1028 n = w->prev;
1029 debug_printf_walker(" free(%p)\n", w);
1030 free(w);
1031 w = n;
1034 clrvar(p);
1037 g_cb->pos = v;
1038 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1039 g_cb = g_cb->prev;
1043 /* ------- awk program text parsing ------- */
1045 /* Parse next token pointed by global pos, place results into global ttt.
1046 * If token isn't expected, give away. Return token class
1048 static uint32_t next_token(uint32_t expected)
1050 #define concat_inserted (G.next_token__concat_inserted)
1051 #define save_tclass (G.next_token__save_tclass)
1052 #define save_info (G.next_token__save_info)
1053 /* Initialized to TC_OPTERM: */
1054 #define ltclass (G.next_token__ltclass)
1056 char *p, *s;
1057 const char *tl;
1058 uint32_t tc;
1059 const uint32_t *ti;
1061 if (t_rollback) {
1062 t_rollback = FALSE;
1064 } else if (concat_inserted) {
1065 concat_inserted = FALSE;
1066 t_tclass = save_tclass;
1067 t_info = save_info;
1069 } else {
1070 p = g_pos;
1071 readnext:
1072 p = skip_spaces(p);
1073 g_lineno = t_lineno;
1074 if (*p == '#')
1075 while (*p != '\n' && *p != '\0')
1076 p++;
1078 if (*p == '\n')
1079 t_lineno++;
1081 if (*p == '\0') {
1082 tc = TC_EOF;
1083 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1085 } else if (*p == '\"') {
1086 /* it's a string */
1087 t_string = s = ++p;
1088 while (*p != '\"') {
1089 char *pp;
1090 if (*p == '\0' || *p == '\n')
1091 syntax_error(EMSG_UNEXP_EOS);
1092 pp = p;
1093 *s++ = nextchar(&pp);
1094 p = pp;
1096 p++;
1097 *s = '\0';
1098 tc = TC_STRING;
1099 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1101 } else if ((expected & TC_REGEXP) && *p == '/') {
1102 /* it's regexp */
1103 t_string = s = ++p;
1104 while (*p != '/') {
1105 if (*p == '\0' || *p == '\n')
1106 syntax_error(EMSG_UNEXP_EOS);
1107 *s = *p++;
1108 if (*s++ == '\\') {
1109 char *pp = p;
1110 s[-1] = bb_process_escape_sequence((const char **)&pp);
1111 if (*p == '\\')
1112 *s++ = '\\';
1113 if (pp == p)
1114 *s++ = *p++;
1115 else
1116 p = pp;
1119 p++;
1120 *s = '\0';
1121 tc = TC_REGEXP;
1122 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1124 } else if (*p == '.' || isdigit(*p)) {
1125 /* it's a number */
1126 char *pp = p;
1127 t_double = my_strtod(&pp);
1128 p = pp;
1129 if (*p == '.')
1130 syntax_error(EMSG_UNEXP_TOKEN);
1131 tc = TC_NUMBER;
1132 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1134 } else {
1135 /* search for something known */
1136 tl = tokenlist;
1137 tc = 0x00000001;
1138 ti = tokeninfo;
1139 while (*tl) {
1140 int l = (unsigned char) *tl++;
1141 if (l == (unsigned char) NTCC) {
1142 tc <<= 1;
1143 continue;
1145 /* if token class is expected,
1146 * token matches,
1147 * and it's not a longer word,
1149 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150 && strncmp(p, tl, l) == 0
1151 && !((tc & TC_WORD) && isalnum_(p[l]))
1153 /* then this is what we are looking for */
1154 t_info = *ti;
1155 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1156 p += l;
1157 goto token_found;
1159 ti++;
1160 tl += l;
1162 /* not a known token */
1164 /* is it a name? (var/array/function) */
1165 if (!isalnum_(*p))
1166 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1167 /* yes */
1168 t_string = --p;
1169 while (isalnum_(*++p)) {
1170 p[-1] = *p;
1172 p[-1] = '\0';
1173 tc = TC_VARIABLE;
1174 /* also consume whitespace between functionname and bracket */
1175 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1176 p = skip_spaces(p);
1177 if (*p == '(') {
1178 tc = TC_FUNCTION;
1179 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1180 } else {
1181 if (*p == '[') {
1182 p++;
1183 tc = TC_ARRAY;
1184 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1185 } else
1186 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1189 token_found:
1190 g_pos = p;
1192 /* skipping newlines in some cases */
1193 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1194 goto readnext;
1196 /* insert concatenation operator when needed */
1197 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198 concat_inserted = TRUE;
1199 save_tclass = tc;
1200 save_info = t_info;
1201 tc = TC_BINOP;
1202 t_info = OC_CONCAT | SS | P(35);
1205 t_tclass = tc;
1207 ltclass = t_tclass;
1209 /* Are we ready for this? */
1210 if (!(ltclass & expected))
1211 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1214 return ltclass;
1215 #undef concat_inserted
1216 #undef save_tclass
1217 #undef save_info
1218 #undef ltclass
1221 static void rollback_token(void)
1223 t_rollback = TRUE;
1226 static node *new_node(uint32_t info)
1228 node *n;
1230 n = xzalloc(sizeof(node));
1231 n->info = info;
1232 n->lineno = g_lineno;
1233 return n;
1236 static void mk_re_node(const char *s, node *n, regex_t *re)
1238 n->info = OC_REGEXP;
1239 n->l.re = re;
1240 n->r.ire = re + 1;
1241 xregcomp(re, s, REG_EXTENDED);
1242 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1245 static node *condition(void)
1247 next_token(TC_SEQSTART);
1248 return parse_expr(TC_SEQTERM);
1251 /* parse expression terminated by given argument, return ptr
1252 * to built subtree. Terminator is eaten by parse_expr */
1253 static node *parse_expr(uint32_t iexp)
1255 node sn;
1256 node *cn = &sn;
1257 node *vn, *glptr;
1258 uint32_t tc, xtc;
1259 var *v;
1261 debug_printf_parse("%s(%x)\n", __func__, iexp);
1263 sn.info = PRIMASK;
1264 sn.r.n = glptr = NULL;
1265 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1267 while (!((tc = next_token(xtc)) & iexp)) {
1269 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270 /* input redirection (<) attached to glptr node */
1271 debug_printf_parse("%s: input redir\n", __func__);
1272 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1273 cn->a.n = glptr;
1274 xtc = TC_OPERAND | TC_UOPPRE;
1275 glptr = NULL;
1277 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279 /* for binary and postfix-unary operators, jump back over
1280 * previous operators with higher priority */
1281 vn = cn;
1282 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1285 vn = vn->a.n;
1287 if ((t_info & OPCLSMASK) == OC_TERNARY)
1288 t_info += P(6);
1289 cn = vn->a.n->r.n = new_node(t_info);
1290 cn->a.n = vn->a.n;
1291 if (tc & TC_BINOP) {
1292 cn->l.n = vn;
1293 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1295 /* it's a pipe */
1296 next_token(TC_GETLINE);
1297 /* give maximum priority to this pipe */
1298 cn->info &= ~PRIMASK;
1299 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1301 } else {
1302 cn->r.n = vn;
1303 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1305 vn->a.n = cn;
1307 } else {
1308 debug_printf_parse("%s: other\n", __func__);
1309 /* for operands and prefix-unary operators, attach them
1310 * to last node */
1311 vn = cn;
1312 cn = vn->r.n = new_node(t_info);
1313 cn->a.n = vn;
1314 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315 if (tc & (TC_OPERAND | TC_REGEXP)) {
1316 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318 /* one should be very careful with switch on tclass -
1319 * only simple tclasses should be used! */
1320 switch (tc) {
1321 case TC_VARIABLE:
1322 case TC_ARRAY:
1323 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1324 cn->info = OC_VAR;
1325 v = hash_search(ahash, t_string);
1326 if (v != NULL) {
1327 cn->info = OC_FNARG;
1328 cn->l.aidx = v->x.aidx;
1329 } else {
1330 cn->l.v = newvar(t_string);
1332 if (tc & TC_ARRAY) {
1333 cn->info |= xS;
1334 cn->r.n = parse_expr(TC_ARRTERM);
1336 break;
1338 case TC_NUMBER:
1339 case TC_STRING:
1340 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1341 cn->info = OC_VAR;
1342 v = cn->l.v = xzalloc(sizeof(var));
1343 if (tc & TC_NUMBER)
1344 setvar_i(v, t_double);
1345 else
1346 setvar_s(v, t_string);
1347 break;
1349 case TC_REGEXP:
1350 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1352 break;
1354 case TC_FUNCTION:
1355 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1356 cn->info = OC_FUNC;
1357 cn->r.f = newfunc(t_string);
1358 cn->l.n = condition();
1359 break;
1361 case TC_SEQSTART:
1362 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363 cn = vn->r.n = parse_expr(TC_SEQTERM);
1364 if (!cn)
1365 syntax_error("Empty sequence");
1366 cn->a.n = vn;
1367 break;
1369 case TC_GETLINE:
1370 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1371 glptr = cn;
1372 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1373 break;
1375 case TC_BUILTIN:
1376 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377 cn->l.n = condition();
1378 break;
1384 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1385 return sn.r.n;
1388 /* add node to chain. Return ptr to alloc'd node */
1389 static node *chain_node(uint32_t info)
1391 node *n;
1393 if (!seq->first)
1394 seq->first = seq->last = new_node(0);
1396 if (seq->programname != g_progname) {
1397 seq->programname = g_progname;
1398 n = chain_node(OC_NEWSOURCE);
1399 n->l.new_progname = xstrdup(g_progname);
1402 n = seq->last;
1403 n->info = info;
1404 seq->last = n->a.n = new_node(OC_DONE);
1406 return n;
1409 static void chain_expr(uint32_t info)
1411 node *n;
1413 n = chain_node(info);
1414 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415 if (t_tclass & TC_GRPTERM)
1416 rollback_token();
1419 static node *chain_loop(node *nn)
1421 node *n, *n2, *save_brk, *save_cont;
1423 save_brk = break_ptr;
1424 save_cont = continue_ptr;
1426 n = chain_node(OC_BR | Vx);
1427 continue_ptr = new_node(OC_EXEC);
1428 break_ptr = new_node(OC_EXEC);
1429 chain_group();
1430 n2 = chain_node(OC_EXEC | Vx);
1431 n2->l.n = nn;
1432 n2->a.n = n;
1433 continue_ptr->a.n = n2;
1434 break_ptr->a.n = n->r.n = seq->last;
1436 continue_ptr = save_cont;
1437 break_ptr = save_brk;
1439 return n;
1442 /* parse group and attach it to chain */
1443 static void chain_group(void)
1445 uint32_t c;
1446 node *n, *n2, *n3;
1448 do {
1449 c = next_token(TC_GRPSEQ);
1450 } while (c & TC_NEWLINE);
1452 if (c & TC_GRPSTART) {
1453 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456 if (t_tclass & TC_NEWLINE)
1457 continue;
1458 rollback_token();
1459 chain_group();
1461 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1464 rollback_token();
1465 chain_expr(OC_EXEC | Vx);
1466 } else {
1467 /* TC_STATEMNT */
1468 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469 switch (t_info & OPCLSMASK) {
1470 case ST_IF:
1471 debug_printf_parse("%s: ST_IF\n", __func__);
1472 n = chain_node(OC_BR | Vx);
1473 n->l.n = condition();
1474 chain_group();
1475 n2 = chain_node(OC_EXEC);
1476 n->r.n = seq->last;
1477 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1478 chain_group();
1479 n2->a.n = seq->last;
1480 } else {
1481 rollback_token();
1483 break;
1485 case ST_WHILE:
1486 debug_printf_parse("%s: ST_WHILE\n", __func__);
1487 n2 = condition();
1488 n = chain_loop(NULL);
1489 n->l.n = n2;
1490 break;
1492 case ST_DO:
1493 debug_printf_parse("%s: ST_DO\n", __func__);
1494 n2 = chain_node(OC_EXEC);
1495 n = chain_loop(NULL);
1496 n2->a.n = n->a.n;
1497 next_token(TC_WHILE);
1498 n->l.n = condition();
1499 break;
1501 case ST_FOR:
1502 debug_printf_parse("%s: ST_FOR\n", __func__);
1503 next_token(TC_SEQSTART);
1504 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505 if (t_tclass & TC_SEQTERM) { /* for-in */
1506 if ((n2->info & OPCLSMASK) != OC_IN)
1507 syntax_error(EMSG_UNEXP_TOKEN);
1508 n = chain_node(OC_WALKINIT | VV);
1509 n->l.n = n2->l.n;
1510 n->r.n = n2->r.n;
1511 n = chain_loop(NULL);
1512 n->info = OC_WALKNEXT | Vx;
1513 n->l.n = n2->l.n;
1514 } else { /* for (;;) */
1515 n = chain_node(OC_EXEC | Vx);
1516 n->l.n = n2;
1517 n2 = parse_expr(TC_SEMICOL);
1518 n3 = parse_expr(TC_SEQTERM);
1519 n = chain_loop(n3);
1520 n->l.n = n2;
1521 if (!n2)
1522 n->info = OC_EXEC;
1524 break;
1526 case OC_PRINT:
1527 case OC_PRINTF:
1528 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529 n = chain_node(t_info);
1530 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531 if (t_tclass & TC_OUTRDR) {
1532 n->info |= t_info;
1533 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1535 if (t_tclass & TC_GRPTERM)
1536 rollback_token();
1537 break;
1539 case OC_BREAK:
1540 debug_printf_parse("%s: OC_BREAK\n", __func__);
1541 n = chain_node(OC_EXEC);
1542 n->a.n = break_ptr;
1543 chain_expr(t_info);
1544 break;
1546 case OC_CONTINUE:
1547 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1548 n = chain_node(OC_EXEC);
1549 n->a.n = continue_ptr;
1550 chain_expr(t_info);
1551 break;
1553 /* delete, next, nextfile, return, exit */
1554 default:
1555 debug_printf_parse("%s: default\n", __func__);
1556 chain_expr(t_info);
1561 static void parse_program(char *p)
1563 uint32_t tclass;
1564 node *cn;
1565 func *f;
1566 var *v;
1568 g_pos = p;
1569 t_lineno = 1;
1570 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1571 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1573 if (tclass & TC_OPTERM) {
1574 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1575 continue;
1578 seq = &mainseq;
1579 if (tclass & TC_BEGIN) {
1580 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1581 seq = &beginseq;
1582 chain_group();
1584 } else if (tclass & TC_END) {
1585 debug_printf_parse("%s: TC_END\n", __func__);
1586 seq = &endseq;
1587 chain_group();
1589 } else if (tclass & TC_FUNCDECL) {
1590 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1591 next_token(TC_FUNCTION);
1592 g_pos++;
1593 f = newfunc(t_string);
1594 f->body.first = NULL;
1595 f->nargs = 0;
1596 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1597 v = findvar(ahash, t_string);
1598 v->x.aidx = f->nargs++;
1600 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1601 break;
1603 seq = &f->body;
1604 chain_group();
1605 clear_array(ahash);
1607 } else if (tclass & TC_OPSEQ) {
1608 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1609 rollback_token();
1610 cn = chain_node(OC_TEST);
1611 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1612 if (t_tclass & TC_GRPSTART) {
1613 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1614 rollback_token();
1615 chain_group();
1616 } else {
1617 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1618 chain_node(OC_PRINT);
1620 cn->r.n = mainseq.last;
1622 } else /* if (tclass & TC_GRPSTART) */ {
1623 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1624 rollback_token();
1625 chain_group();
1628 debug_printf_parse("%s: TC_EOF\n", __func__);
1632 /* -------- program execution part -------- */
1634 static node *mk_splitter(const char *s, tsplitter *spl)
1636 regex_t *re, *ire;
1637 node *n;
1639 re = &spl->re[0];
1640 ire = &spl->re[1];
1641 n = &spl->n;
1642 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1643 regfree(re);
1644 regfree(ire); // TODO: nuke ire, use re+1?
1646 if (s[0] && s[1]) { /* strlen(s) > 1 */
1647 mk_re_node(s, n, re);
1648 } else {
1649 n->info = (uint32_t) s[0];
1652 return n;
1655 /* use node as a regular expression. Supplied with node ptr and regex_t
1656 * storage space. Return ptr to regex (if result points to preg, it should
1657 * be later regfree'd manually
1659 static regex_t *as_regex(node *op, regex_t *preg)
1661 int cflags;
1662 var *v;
1663 const char *s;
1665 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1666 return icase ? op->r.ire : op->l.re;
1668 v = nvalloc(1);
1669 s = getvar_s(evaluate(op, v));
1671 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1672 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1673 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1674 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1675 * (maybe gsub is not supposed to use REG_EXTENDED?).
1677 if (regcomp(preg, s, cflags)) {
1678 cflags &= ~REG_EXTENDED;
1679 xregcomp(preg, s, cflags);
1681 nvfree(v);
1682 return preg;
1685 /* gradually increasing buffer.
1686 * note that we reallocate even if n == old_size,
1687 * and thus there is at least one extra allocated byte.
1689 static char* qrealloc(char *b, int n, int *size)
1691 if (!b || n >= *size) {
1692 *size = n + (n>>1) + 80;
1693 b = xrealloc(b, *size);
1695 return b;
1698 /* resize field storage space */
1699 static void fsrealloc(int size)
1701 int i;
1703 if (size >= maxfields) {
1704 i = maxfields;
1705 maxfields = size + 16;
1706 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1707 for (; i < maxfields; i++) {
1708 Fields[i].type = VF_SPECIAL;
1709 Fields[i].string = NULL;
1712 /* if size < nfields, clear extra field variables */
1713 for (i = size; i < nfields; i++) {
1714 clrvar(Fields + i);
1716 nfields = size;
1719 static int awk_split(const char *s, node *spl, char **slist)
1721 int l, n;
1722 char c[4];
1723 char *s1;
1724 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1726 /* in worst case, each char would be a separate field */
1727 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1728 strcpy(s1, s);
1730 c[0] = c[1] = (char)spl->info;
1731 c[2] = c[3] = '\0';
1732 if (*getvar_s(intvar[RS]) == '\0')
1733 c[2] = '\n';
1735 n = 0;
1736 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1737 if (!*s)
1738 return n; /* "": zero fields */
1739 n++; /* at least one field will be there */
1740 do {
1741 l = strcspn(s, c+2); /* len till next NUL or \n */
1742 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1743 && pmatch[0].rm_so <= l
1745 l = pmatch[0].rm_so;
1746 if (pmatch[0].rm_eo == 0) {
1747 l++;
1748 pmatch[0].rm_eo++;
1750 n++; /* we saw yet another delimiter */
1751 } else {
1752 pmatch[0].rm_eo = l;
1753 if (s[l])
1754 pmatch[0].rm_eo++;
1756 memcpy(s1, s, l);
1757 /* make sure we remove *all* of the separator chars */
1758 do {
1759 s1[l] = '\0';
1760 } while (++l < pmatch[0].rm_eo);
1761 nextword(&s1);
1762 s += pmatch[0].rm_eo;
1763 } while (*s);
1764 return n;
1766 if (c[0] == '\0') { /* null split */
1767 while (*s) {
1768 *s1++ = *s++;
1769 *s1++ = '\0';
1770 n++;
1772 return n;
1774 if (c[0] != ' ') { /* single-character split */
1775 if (icase) {
1776 c[0] = toupper(c[0]);
1777 c[1] = tolower(c[1]);
1779 if (*s1)
1780 n++;
1781 while ((s1 = strpbrk(s1, c)) != NULL) {
1782 *s1++ = '\0';
1783 n++;
1785 return n;
1787 /* space split */
1788 while (*s) {
1789 s = skip_whitespace(s);
1790 if (!*s)
1791 break;
1792 n++;
1793 while (*s && !isspace(*s))
1794 *s1++ = *s++;
1795 *s1++ = '\0';
1797 return n;
1800 static void split_f0(void)
1802 /* static char *fstrings; */
1803 #define fstrings (G.split_f0__fstrings)
1805 int i, n;
1806 char *s;
1808 if (is_f0_split)
1809 return;
1811 is_f0_split = TRUE;
1812 free(fstrings);
1813 fsrealloc(0);
1814 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1815 fsrealloc(n);
1816 s = fstrings;
1817 for (i = 0; i < n; i++) {
1818 Fields[i].string = nextword(&s);
1819 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1822 /* set NF manually to avoid side effects */
1823 clrvar(intvar[NF]);
1824 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1825 intvar[NF]->number = nfields;
1826 #undef fstrings
1829 /* perform additional actions when some internal variables changed */
1830 static void handle_special(var *v)
1832 int n;
1833 char *b;
1834 const char *sep, *s;
1835 int sl, l, len, i, bsize;
1837 if (!(v->type & VF_SPECIAL))
1838 return;
1840 if (v == intvar[NF]) {
1841 n = (int)getvar_i(v);
1842 fsrealloc(n);
1844 /* recalculate $0 */
1845 sep = getvar_s(intvar[OFS]);
1846 sl = strlen(sep);
1847 b = NULL;
1848 len = 0;
1849 for (i = 0; i < n; i++) {
1850 s = getvar_s(&Fields[i]);
1851 l = strlen(s);
1852 if (b) {
1853 memcpy(b+len, sep, sl);
1854 len += sl;
1856 b = qrealloc(b, len+l+sl, &bsize);
1857 memcpy(b+len, s, l);
1858 len += l;
1860 if (b)
1861 b[len] = '\0';
1862 setvar_p(intvar[F0], b);
1863 is_f0_split = TRUE;
1865 } else if (v == intvar[F0]) {
1866 is_f0_split = FALSE;
1868 } else if (v == intvar[FS]) {
1870 * The POSIX-2008 standard says that changing FS should have no effect on the
1871 * current input line, but only on the next one. The language is:
1873 * > Before the first reference to a field in the record is evaluated, the record
1874 * > shall be split into fields, according to the rules in Regular Expressions,
1875 * > using the value of FS that was current at the time the record was read.
1877 * So, split up current line before assignment to FS:
1879 split_f0();
1881 mk_splitter(getvar_s(v), &fsplitter);
1883 } else if (v == intvar[RS]) {
1884 mk_splitter(getvar_s(v), &rsplitter);
1886 } else if (v == intvar[IGNORECASE]) {
1887 icase = istrue(v);
1889 } else { /* $n */
1890 n = getvar_i(intvar[NF]);
1891 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1892 /* right here v is invalid. Just to note... */
1896 /* step through func/builtin/etc arguments */
1897 static node *nextarg(node **pn)
1899 node *n;
1901 n = *pn;
1902 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1903 *pn = n->r.n;
1904 n = n->l.n;
1905 } else {
1906 *pn = NULL;
1908 return n;
1911 static void hashwalk_init(var *v, xhash *array)
1913 hash_item *hi;
1914 unsigned i;
1915 walker_list *w;
1916 walker_list *prev_walker;
1918 if (v->type & VF_WALK) {
1919 prev_walker = v->x.walker;
1920 } else {
1921 v->type |= VF_WALK;
1922 prev_walker = NULL;
1924 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1926 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1927 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1928 w->cur = w->end = w->wbuf;
1929 w->prev = prev_walker;
1930 for (i = 0; i < array->csize; i++) {
1931 hi = array->items[i];
1932 while (hi) {
1933 strcpy(w->end, hi->name);
1934 nextword(&w->end);
1935 hi = hi->next;
1940 static int hashwalk_next(var *v)
1942 walker_list *w = v->x.walker;
1944 if (w->cur >= w->end) {
1945 walker_list *prev_walker = w->prev;
1947 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1948 free(w);
1949 v->x.walker = prev_walker;
1950 return FALSE;
1953 setvar_s(v, nextword(&w->cur));
1954 return TRUE;
1957 /* evaluate node, return 1 when result is true, 0 otherwise */
1958 static int ptest(node *pattern)
1960 /* ptest__v is "static": to save stack space? */
1961 return istrue(evaluate(pattern, &G.ptest__v));
1964 /* read next record from stream rsm into a variable v */
1965 static int awk_getline(rstream *rsm, var *v)
1967 char *b;
1968 regmatch_t pmatch[2];
1969 int size, a, p, pp = 0;
1970 int fd, so, eo, r, rp;
1971 char c, *m, *s;
1973 debug_printf_eval("entered %s()\n", __func__);
1975 /* we're using our own buffer since we need access to accumulating
1976 * characters
1978 fd = fileno(rsm->F);
1979 m = rsm->buffer;
1980 a = rsm->adv;
1981 p = rsm->pos;
1982 size = rsm->size;
1983 c = (char) rsplitter.n.info;
1984 rp = 0;
1986 if (!m)
1987 m = qrealloc(m, 256, &size);
1989 do {
1990 b = m + a;
1991 so = eo = p;
1992 r = 1;
1993 if (p > 0) {
1994 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1995 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1996 b, 1, pmatch, 0) == 0) {
1997 so = pmatch[0].rm_so;
1998 eo = pmatch[0].rm_eo;
1999 if (b[eo] != '\0')
2000 break;
2002 } else if (c != '\0') {
2003 s = strchr(b+pp, c);
2004 if (!s)
2005 s = memchr(b+pp, '\0', p - pp);
2006 if (s) {
2007 so = eo = s-b;
2008 eo++;
2009 break;
2011 } else {
2012 while (b[rp] == '\n')
2013 rp++;
2014 s = strstr(b+rp, "\n\n");
2015 if (s) {
2016 so = eo = s-b;
2017 while (b[eo] == '\n')
2018 eo++;
2019 if (b[eo] != '\0')
2020 break;
2025 if (a > 0) {
2026 memmove(m, m+a, p+1);
2027 b = m;
2028 a = 0;
2031 m = qrealloc(m, a+p+128, &size);
2032 b = m + a;
2033 pp = p;
2034 p += safe_read(fd, b+p, size-p-1);
2035 if (p < pp) {
2036 p = 0;
2037 r = 0;
2038 setvar_i(intvar[ERRNO], errno);
2040 b[p] = '\0';
2042 } while (p > pp);
2044 if (p == 0) {
2045 r--;
2046 } else {
2047 c = b[so]; b[so] = '\0';
2048 setvar_s(v, b+rp);
2049 v->type |= VF_USER;
2050 b[so] = c;
2051 c = b[eo]; b[eo] = '\0';
2052 setvar_s(intvar[RT], b+so);
2053 b[eo] = c;
2056 rsm->buffer = m;
2057 rsm->adv = a + eo;
2058 rsm->pos = p - eo;
2059 rsm->size = size;
2061 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2063 return r;
2066 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2068 int r = 0;
2069 char c;
2070 const char *s = format;
2072 if (int_as_int && n == (long long)n) {
2073 r = snprintf(b, size, "%lld", (long long)n);
2074 } else {
2075 do { c = *s; } while (c && *++s);
2076 if (strchr("diouxX", c)) {
2077 r = snprintf(b, size, format, (int)n);
2078 } else if (strchr("eEfgG", c)) {
2079 r = snprintf(b, size, format, n);
2080 } else {
2081 syntax_error(EMSG_INV_FMT);
2084 return r;
2087 /* formatted output into an allocated buffer, return ptr to buffer */
2088 static char *awk_printf(node *n)
2090 char *b = NULL;
2091 char *fmt, *s, *f;
2092 const char *s1;
2093 int i, j, incr, bsize;
2094 char c, c1;
2095 var *v, *arg;
2097 v = nvalloc(1);
2098 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2100 i = 0;
2101 while (*f) {
2102 s = f;
2103 while (*f && (*f != '%' || *++f == '%'))
2104 f++;
2105 while (*f && !isalpha(*f)) {
2106 if (*f == '*')
2107 syntax_error("%*x formats are not supported");
2108 f++;
2111 incr = (f - s) + MAXVARFMT;
2112 b = qrealloc(b, incr + i, &bsize);
2113 c = *f;
2114 if (c != '\0')
2115 f++;
2116 c1 = *f;
2117 *f = '\0';
2118 arg = evaluate(nextarg(&n), v);
2120 j = i;
2121 if (c == 'c' || !c) {
2122 i += sprintf(b+i, s, is_numeric(arg) ?
2123 (char)getvar_i(arg) : *getvar_s(arg));
2124 } else if (c == 's') {
2125 s1 = getvar_s(arg);
2126 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2127 i += sprintf(b+i, s, s1);
2128 } else {
2129 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2131 *f = c1;
2133 /* if there was an error while sprintf, return value is negative */
2134 if (i < j)
2135 i = j;
2138 free(fmt);
2139 nvfree(v);
2140 b = xrealloc(b, i + 1);
2141 b[i] = '\0';
2142 return b;
2145 /* Common substitution routine.
2146 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2147 * store result into (dest), return number of substitutions.
2148 * If nm = 0, replace all matches.
2149 * If src or dst is NULL, use $0.
2150 * If subexp != 0, enable subexpression matching (\1-\9).
2152 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2154 char *resbuf;
2155 const char *sp;
2156 int match_no, residx, replen, resbufsize;
2157 int regexec_flags;
2158 regmatch_t pmatch[10];
2159 regex_t sreg, *regex;
2161 resbuf = NULL;
2162 residx = 0;
2163 match_no = 0;
2164 regexec_flags = 0;
2165 regex = as_regex(rn, &sreg);
2166 sp = getvar_s(src ? src : intvar[F0]);
2167 replen = strlen(repl);
2168 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2169 int so = pmatch[0].rm_so;
2170 int eo = pmatch[0].rm_eo;
2172 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2173 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2174 memcpy(resbuf + residx, sp, eo);
2175 residx += eo;
2176 if (++match_no >= nm) {
2177 const char *s;
2178 int nbs;
2180 /* replace */
2181 residx -= (eo - so);
2182 nbs = 0;
2183 for (s = repl; *s; s++) {
2184 char c = resbuf[residx++] = *s;
2185 if (c == '\\') {
2186 nbs++;
2187 continue;
2189 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2190 int j;
2191 residx -= ((nbs + 3) >> 1);
2192 j = 0;
2193 if (c != '&') {
2194 j = c - '0';
2195 nbs++;
2197 if (nbs % 2) {
2198 resbuf[residx++] = c;
2199 } else {
2200 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2201 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2202 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2203 residx += n;
2206 nbs = 0;
2210 regexec_flags = REG_NOTBOL;
2211 sp += eo;
2212 if (match_no == nm)
2213 break;
2214 if (eo == so) {
2215 /* Empty match (e.g. "b*" will match anywhere).
2216 * Advance by one char. */
2217 //BUG (bug 1333):
2218 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2219 //... and will erroneously match "b" even though it is NOT at the word start.
2220 //we need REG_NOTBOW but it does not exist...
2221 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2222 //it should be able to do it correctly.
2223 /* Subtle: this is safe only because
2224 * qrealloc allocated at least one extra byte */
2225 resbuf[residx] = *sp;
2226 if (*sp == '\0')
2227 goto ret;
2228 sp++;
2229 residx++;
2233 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2234 strcpy(resbuf + residx, sp);
2235 ret:
2236 //bb_error_msg("end sp:'%s'%p", sp,sp);
2237 setvar_p(dest ? dest : intvar[F0], resbuf);
2238 if (regex == &sreg)
2239 regfree(regex);
2240 return match_no;
2243 static NOINLINE int do_mktime(const char *ds)
2245 struct tm then;
2246 int count;
2248 /*memset(&then, 0, sizeof(then)); - not needed */
2249 then.tm_isdst = -1; /* default is unknown */
2251 /* manpage of mktime says these fields are ints,
2252 * so we can sscanf stuff directly into them */
2253 count = sscanf(ds, "%u %u %u %u %u %u %d",
2254 &then.tm_year, &then.tm_mon, &then.tm_mday,
2255 &then.tm_hour, &then.tm_min, &then.tm_sec,
2256 &then.tm_isdst);
2258 if (count < 6
2259 || (unsigned)then.tm_mon < 1
2260 || (unsigned)then.tm_year < 1900
2262 return -1;
2265 then.tm_mon -= 1;
2266 then.tm_year -= 1900;
2268 return mktime(&then);
2271 static NOINLINE var *exec_builtin(node *op, var *res)
2273 #define tspl (G.exec_builtin__tspl)
2275 var *tv;
2276 node *an[4];
2277 var *av[4];
2278 const char *as[4];
2279 regmatch_t pmatch[2];
2280 regex_t sreg, *re;
2281 node *spl;
2282 uint32_t isr, info;
2283 int nargs;
2284 time_t tt;
2285 int i, l, ll, n;
2287 tv = nvalloc(4);
2288 isr = info = op->info;
2289 op = op->l.n;
2291 av[2] = av[3] = NULL;
2292 for (i = 0; i < 4 && op; i++) {
2293 an[i] = nextarg(&op);
2294 if (isr & 0x09000000)
2295 av[i] = evaluate(an[i], &tv[i]);
2296 if (isr & 0x08000000)
2297 as[i] = getvar_s(av[i]);
2298 isr >>= 1;
2301 nargs = i;
2302 if ((uint32_t)nargs < (info >> 30))
2303 syntax_error(EMSG_TOO_FEW_ARGS);
2305 info &= OPNMASK;
2306 switch (info) {
2308 case B_a2:
2309 if (ENABLE_FEATURE_AWK_LIBM)
2310 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2311 else
2312 syntax_error(EMSG_NO_MATH);
2313 break;
2315 case B_sp: {
2316 char *s, *s1;
2318 if (nargs > 2) {
2319 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2320 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2321 } else {
2322 spl = &fsplitter.n;
2325 n = awk_split(as[0], spl, &s);
2326 s1 = s;
2327 clear_array(iamarray(av[1]));
2328 for (i = 1; i <= n; i++)
2329 setari_u(av[1], i, nextword(&s));
2330 free(s1);
2331 setvar_i(res, n);
2332 break;
2335 case B_ss: {
2336 char *s;
2338 l = strlen(as[0]);
2339 i = getvar_i(av[1]) - 1;
2340 if (i > l)
2341 i = l;
2342 if (i < 0)
2343 i = 0;
2344 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2345 if (n < 0)
2346 n = 0;
2347 s = xstrndup(as[0]+i, n);
2348 setvar_p(res, s);
2349 break;
2352 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2353 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2354 case B_an:
2355 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2356 break;
2358 case B_co:
2359 setvar_i(res, ~getvar_i_int(av[0]));
2360 break;
2362 case B_ls:
2363 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2364 break;
2366 case B_or:
2367 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2368 break;
2370 case B_rs:
2371 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2372 break;
2374 case B_xo:
2375 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2376 break;
2378 case B_lo:
2379 case B_up: {
2380 char *s, *s1;
2381 s1 = s = xstrdup(as[0]);
2382 while (*s1) {
2383 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2384 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2385 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2386 s1++;
2388 setvar_p(res, s);
2389 break;
2392 case B_ix:
2393 n = 0;
2394 ll = strlen(as[1]);
2395 l = strlen(as[0]) - ll;
2396 if (ll > 0 && l >= 0) {
2397 if (!icase) {
2398 char *s = strstr(as[0], as[1]);
2399 if (s)
2400 n = (s - as[0]) + 1;
2401 } else {
2402 /* this piece of code is terribly slow and
2403 * really should be rewritten
2405 for (i = 0; i <= l; i++) {
2406 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2407 n = i+1;
2408 break;
2413 setvar_i(res, n);
2414 break;
2416 case B_ti:
2417 if (nargs > 1)
2418 tt = getvar_i(av[1]);
2419 else
2420 time(&tt);
2421 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2422 i = strftime(g_buf, MAXVARFMT,
2423 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2424 localtime(&tt));
2425 g_buf[i] = '\0';
2426 setvar_s(res, g_buf);
2427 break;
2429 case B_mt:
2430 setvar_i(res, do_mktime(as[0]));
2431 break;
2433 case B_ma:
2434 re = as_regex(an[1], &sreg);
2435 n = regexec(re, as[0], 1, pmatch, 0);
2436 if (n == 0) {
2437 pmatch[0].rm_so++;
2438 pmatch[0].rm_eo++;
2439 } else {
2440 pmatch[0].rm_so = 0;
2441 pmatch[0].rm_eo = -1;
2443 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2444 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2445 setvar_i(res, pmatch[0].rm_so);
2446 if (re == &sreg)
2447 regfree(re);
2448 break;
2450 case B_ge:
2451 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2452 break;
2454 case B_gs:
2455 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2456 break;
2458 case B_su:
2459 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2460 break;
2463 nvfree(tv);
2464 return res;
2465 #undef tspl
2469 * Evaluate node - the heart of the program. Supplied with subtree
2470 * and place where to store result. returns ptr to result.
2472 #define XC(n) ((n) >> 8)
2474 static var *evaluate(node *op, var *res)
2476 /* This procedure is recursive so we should count every byte */
2477 #define fnargs (G.evaluate__fnargs)
2478 /* seed is initialized to 1 */
2479 #define seed (G.evaluate__seed)
2480 #define sreg (G.evaluate__sreg)
2482 var *v1;
2484 if (!op)
2485 return setvar_s(res, NULL);
2487 debug_printf_eval("entered %s()\n", __func__);
2489 v1 = nvalloc(2);
2491 while (op) {
2492 struct {
2493 var *v;
2494 const char *s;
2495 } L = L; /* for compiler */
2496 struct {
2497 var *v;
2498 const char *s;
2499 } R = R;
2500 double L_d = L_d;
2501 uint32_t opinfo;
2502 int opn;
2503 node *op1;
2505 opinfo = op->info;
2506 opn = (opinfo & OPNMASK);
2507 g_lineno = op->lineno;
2508 op1 = op->l.n;
2509 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2511 /* execute inevitable things */
2512 if (opinfo & OF_RES1)
2513 L.v = evaluate(op1, v1);
2514 if (opinfo & OF_RES2)
2515 R.v = evaluate(op->r.n, v1+1);
2516 if (opinfo & OF_STR1) {
2517 L.s = getvar_s(L.v);
2518 debug_printf_eval("L.s:'%s'\n", L.s);
2520 if (opinfo & OF_STR2) {
2521 R.s = getvar_s(R.v);
2522 debug_printf_eval("R.s:'%s'\n", R.s);
2524 if (opinfo & OF_NUM1) {
2525 L_d = getvar_i(L.v);
2526 debug_printf_eval("L_d:%f\n", L_d);
2529 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2530 switch (XC(opinfo & OPCLSMASK)) {
2532 /* -- iterative node type -- */
2534 /* test pattern */
2535 case XC( OC_TEST ):
2536 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2537 /* it's range pattern */
2538 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2539 op->info |= OF_CHECKED;
2540 if (ptest(op1->r.n))
2541 op->info &= ~OF_CHECKED;
2542 op = op->a.n;
2543 } else {
2544 op = op->r.n;
2546 } else {
2547 op = ptest(op1) ? op->a.n : op->r.n;
2549 break;
2551 /* just evaluate an expression, also used as unconditional jump */
2552 case XC( OC_EXEC ):
2553 break;
2555 /* branch, used in if-else and various loops */
2556 case XC( OC_BR ):
2557 op = istrue(L.v) ? op->a.n : op->r.n;
2558 break;
2560 /* initialize for-in loop */
2561 case XC( OC_WALKINIT ):
2562 hashwalk_init(L.v, iamarray(R.v));
2563 break;
2565 /* get next array item */
2566 case XC( OC_WALKNEXT ):
2567 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2568 break;
2570 case XC( OC_PRINT ):
2571 case XC( OC_PRINTF ): {
2572 FILE *F = stdout;
2574 if (op->r.n) {
2575 rstream *rsm = newfile(R.s);
2576 if (!rsm->F) {
2577 if (opn == '|') {
2578 rsm->F = popen(R.s, "w");
2579 if (rsm->F == NULL)
2580 bb_perror_msg_and_die("popen");
2581 rsm->is_pipe = 1;
2582 } else {
2583 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2586 F = rsm->F;
2589 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2590 if (!op1) {
2591 fputs(getvar_s(intvar[F0]), F);
2592 } else {
2593 while (op1) {
2594 var *v = evaluate(nextarg(&op1), v1);
2595 if (v->type & VF_NUMBER) {
2596 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2597 getvar_i(v), TRUE);
2598 fputs(g_buf, F);
2599 } else {
2600 fputs(getvar_s(v), F);
2603 if (op1)
2604 fputs(getvar_s(intvar[OFS]), F);
2607 fputs(getvar_s(intvar[ORS]), F);
2609 } else { /* OC_PRINTF */
2610 char *s = awk_printf(op1);
2611 fputs(s, F);
2612 free(s);
2614 fflush(F);
2615 break;
2618 case XC( OC_DELETE ): {
2619 uint32_t info = op1->info & OPCLSMASK;
2620 var *v;
2622 if (info == OC_VAR) {
2623 v = op1->l.v;
2624 } else if (info == OC_FNARG) {
2625 v = &fnargs[op1->l.aidx];
2626 } else {
2627 syntax_error(EMSG_NOT_ARRAY);
2630 if (op1->r.n) {
2631 const char *s;
2632 clrvar(L.v);
2633 s = getvar_s(evaluate(op1->r.n, v1));
2634 hash_remove(iamarray(v), s);
2635 } else {
2636 clear_array(iamarray(v));
2638 break;
2641 case XC( OC_NEWSOURCE ):
2642 g_progname = op->l.new_progname;
2643 break;
2645 case XC( OC_RETURN ):
2646 copyvar(res, L.v);
2647 break;
2649 case XC( OC_NEXTFILE ):
2650 nextfile = TRUE;
2651 case XC( OC_NEXT ):
2652 nextrec = TRUE;
2653 case XC( OC_DONE ):
2654 clrvar(res);
2655 break;
2657 case XC( OC_EXIT ):
2658 awk_exit(L_d);
2660 /* -- recursive node type -- */
2662 case XC( OC_VAR ):
2663 L.v = op->l.v;
2664 if (L.v == intvar[NF])
2665 split_f0();
2666 goto v_cont;
2668 case XC( OC_FNARG ):
2669 L.v = &fnargs[op->l.aidx];
2670 v_cont:
2671 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2672 break;
2674 case XC( OC_IN ):
2675 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2676 break;
2678 case XC( OC_REGEXP ):
2679 op1 = op;
2680 L.s = getvar_s(intvar[F0]);
2681 goto re_cont;
2683 case XC( OC_MATCH ):
2684 op1 = op->r.n;
2685 re_cont:
2687 regex_t *re = as_regex(op1, &sreg);
2688 int i = regexec(re, L.s, 0, NULL, 0);
2689 if (re == &sreg)
2690 regfree(re);
2691 setvar_i(res, (i == 0) ^ (opn == '!'));
2693 break;
2695 case XC( OC_MOVE ):
2696 debug_printf_eval("MOVE\n");
2697 /* if source is a temporary string, jusk relink it to dest */
2698 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2699 //then L.v ends up being a string, which is wrong
2700 // if (R.v == v1+1 && R.v->string) {
2701 // res = setvar_p(L.v, R.v->string);
2702 // R.v->string = NULL;
2703 // } else {
2704 res = copyvar(L.v, R.v);
2705 // }
2706 break;
2708 case XC( OC_TERNARY ):
2709 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2710 syntax_error(EMSG_POSSIBLE_ERROR);
2711 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2712 break;
2714 case XC( OC_FUNC ): {
2715 var *vbeg, *v;
2716 const char *sv_progname;
2718 /* The body might be empty, still has to eval the args */
2719 if (!op->r.n->info && !op->r.f->body.first)
2720 syntax_error(EMSG_UNDEF_FUNC);
2722 vbeg = v = nvalloc(op->r.f->nargs + 1);
2723 while (op1) {
2724 var *arg = evaluate(nextarg(&op1), v1);
2725 copyvar(v, arg);
2726 v->type |= VF_CHILD;
2727 v->x.parent = arg;
2728 if (++v - vbeg >= op->r.f->nargs)
2729 break;
2732 v = fnargs;
2733 fnargs = vbeg;
2734 sv_progname = g_progname;
2736 res = evaluate(op->r.f->body.first, res);
2738 g_progname = sv_progname;
2739 nvfree(fnargs);
2740 fnargs = v;
2742 break;
2745 case XC( OC_GETLINE ):
2746 case XC( OC_PGETLINE ): {
2747 rstream *rsm;
2748 int i;
2750 if (op1) {
2751 rsm = newfile(L.s);
2752 if (!rsm->F) {
2753 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2754 rsm->F = popen(L.s, "r");
2755 rsm->is_pipe = TRUE;
2756 } else {
2757 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2760 } else {
2761 if (!iF)
2762 iF = next_input_file();
2763 rsm = iF;
2766 if (!rsm || !rsm->F) {
2767 setvar_i(intvar[ERRNO], errno);
2768 setvar_i(res, -1);
2769 break;
2772 if (!op->r.n)
2773 R.v = intvar[F0];
2775 i = awk_getline(rsm, R.v);
2776 if (i > 0 && !op1) {
2777 incvar(intvar[FNR]);
2778 incvar(intvar[NR]);
2780 setvar_i(res, i);
2781 break;
2784 /* simple builtins */
2785 case XC( OC_FBLTIN ): {
2786 double R_d = R_d; /* for compiler */
2788 switch (opn) {
2789 case F_in:
2790 R_d = (long long)L_d;
2791 break;
2793 case F_rn:
2794 R_d = (double)rand() / (double)RAND_MAX;
2795 break;
2797 case F_co:
2798 if (ENABLE_FEATURE_AWK_LIBM) {
2799 R_d = cos(L_d);
2800 break;
2803 case F_ex:
2804 if (ENABLE_FEATURE_AWK_LIBM) {
2805 R_d = exp(L_d);
2806 break;
2809 case F_lg:
2810 if (ENABLE_FEATURE_AWK_LIBM) {
2811 R_d = log(L_d);
2812 break;
2815 case F_si:
2816 if (ENABLE_FEATURE_AWK_LIBM) {
2817 R_d = sin(L_d);
2818 break;
2821 case F_sq:
2822 if (ENABLE_FEATURE_AWK_LIBM) {
2823 R_d = sqrt(L_d);
2824 break;
2827 syntax_error(EMSG_NO_MATH);
2828 break;
2830 case F_sr:
2831 R_d = (double)seed;
2832 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2833 srand(seed);
2834 break;
2836 case F_ti:
2837 R_d = time(NULL);
2838 break;
2840 case F_le:
2841 debug_printf_eval("length: L.s:'%s'\n", L.s);
2842 if (!op1) {
2843 L.s = getvar_s(intvar[F0]);
2844 debug_printf_eval("length: L.s='%s'\n", L.s);
2846 else if (L.v->type & VF_ARRAY) {
2847 R_d = L.v->x.array->nel;
2848 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2849 break;
2851 R_d = strlen(L.s);
2852 break;
2854 case F_sy:
2855 fflush_all();
2856 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2857 ? (system(L.s) >> 8) : 0;
2858 break;
2860 case F_ff:
2861 if (!op1) {
2862 fflush(stdout);
2863 } else if (L.s && *L.s) {
2864 rstream *rsm = newfile(L.s);
2865 fflush(rsm->F);
2866 } else {
2867 fflush_all();
2869 break;
2871 case F_cl: {
2872 rstream *rsm;
2873 int err = 0;
2874 rsm = (rstream *)hash_search(fdhash, L.s);
2875 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2876 if (rsm) {
2877 debug_printf_eval("OC_FBLTIN F_cl "
2878 "rsm->is_pipe:%d, ->F:%p\n",
2879 rsm->is_pipe, rsm->F);
2880 /* Can be NULL if open failed. Example:
2881 * getline line <"doesnt_exist";
2882 * close("doesnt_exist"); <--- here rsm->F is NULL
2884 if (rsm->F)
2885 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2886 free(rsm->buffer);
2887 hash_remove(fdhash, L.s);
2889 if (err)
2890 setvar_i(intvar[ERRNO], errno);
2891 R_d = (double)err;
2892 break;
2894 } /* switch */
2895 setvar_i(res, R_d);
2896 break;
2899 case XC( OC_BUILTIN ):
2900 res = exec_builtin(op, res);
2901 break;
2903 case XC( OC_SPRINTF ):
2904 setvar_p(res, awk_printf(op1));
2905 break;
2907 case XC( OC_UNARY ): {
2908 double Ld, R_d;
2910 Ld = R_d = getvar_i(R.v);
2911 switch (opn) {
2912 case 'P':
2913 Ld = ++R_d;
2914 goto r_op_change;
2915 case 'p':
2916 R_d++;
2917 goto r_op_change;
2918 case 'M':
2919 Ld = --R_d;
2920 goto r_op_change;
2921 case 'm':
2922 R_d--;
2923 r_op_change:
2924 setvar_i(R.v, R_d);
2925 break;
2926 case '!':
2927 Ld = !istrue(R.v);
2928 break;
2929 case '-':
2930 Ld = -R_d;
2931 break;
2933 setvar_i(res, Ld);
2934 break;
2937 case XC( OC_FIELD ): {
2938 int i = (int)getvar_i(R.v);
2939 if (i == 0) {
2940 res = intvar[F0];
2941 } else {
2942 split_f0();
2943 if (i > nfields)
2944 fsrealloc(i);
2945 res = &Fields[i - 1];
2947 break;
2950 /* concatenation (" ") and index joining (",") */
2951 case XC( OC_CONCAT ):
2952 case XC( OC_COMMA ): {
2953 const char *sep = "";
2954 if ((opinfo & OPCLSMASK) == OC_COMMA)
2955 sep = getvar_s(intvar[SUBSEP]);
2956 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2957 break;
2960 case XC( OC_LAND ):
2961 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2962 break;
2964 case XC( OC_LOR ):
2965 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2966 break;
2968 case XC( OC_BINARY ):
2969 case XC( OC_REPLACE ): {
2970 double R_d = getvar_i(R.v);
2971 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2972 switch (opn) {
2973 case '+':
2974 L_d += R_d;
2975 break;
2976 case '-':
2977 L_d -= R_d;
2978 break;
2979 case '*':
2980 L_d *= R_d;
2981 break;
2982 case '/':
2983 if (R_d == 0)
2984 syntax_error(EMSG_DIV_BY_ZERO);
2985 L_d /= R_d;
2986 break;
2987 case '&':
2988 if (ENABLE_FEATURE_AWK_LIBM)
2989 L_d = pow(L_d, R_d);
2990 else
2991 syntax_error(EMSG_NO_MATH);
2992 break;
2993 case '%':
2994 if (R_d == 0)
2995 syntax_error(EMSG_DIV_BY_ZERO);
2996 L_d -= (long long)(L_d / R_d) * R_d;
2997 break;
2999 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3000 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3001 break;
3004 case XC( OC_COMPARE ): {
3005 int i = i; /* for compiler */
3006 double Ld;
3008 if (is_numeric(L.v) && is_numeric(R.v)) {
3009 Ld = getvar_i(L.v) - getvar_i(R.v);
3010 } else {
3011 const char *l = getvar_s(L.v);
3012 const char *r = getvar_s(R.v);
3013 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3015 switch (opn & 0xfe) {
3016 case 0:
3017 i = (Ld > 0);
3018 break;
3019 case 2:
3020 i = (Ld >= 0);
3021 break;
3022 case 4:
3023 i = (Ld == 0);
3024 break;
3026 setvar_i(res, (i == 0) ^ (opn & 1));
3027 break;
3030 default:
3031 syntax_error(EMSG_POSSIBLE_ERROR);
3033 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3034 op = op->a.n;
3035 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3036 break;
3037 if (nextrec)
3038 break;
3039 } /* while (op) */
3041 nvfree(v1);
3042 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3043 return res;
3044 #undef fnargs
3045 #undef seed
3046 #undef sreg
3050 /* -------- main & co. -------- */
3052 static int awk_exit(int r)
3054 var tv;
3055 unsigned i;
3056 hash_item *hi;
3058 zero_out_var(&tv);
3060 if (!exiting) {
3061 exiting = TRUE;
3062 nextrec = FALSE;
3063 evaluate(endseq.first, &tv);
3066 /* waiting for children */
3067 for (i = 0; i < fdhash->csize; i++) {
3068 hi = fdhash->items[i];
3069 while (hi) {
3070 if (hi->data.rs.F && hi->data.rs.is_pipe)
3071 pclose(hi->data.rs.F);
3072 hi = hi->next;
3076 exit(r);
3079 /* if expr looks like "var=value", perform assignment and return 1,
3080 * otherwise return 0 */
3081 static int is_assignment(const char *expr)
3083 char *exprc, *val;
3085 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3086 return FALSE;
3089 exprc = xstrdup(expr);
3090 val = exprc + (val - expr);
3091 *val++ = '\0';
3093 unescape_string_in_place(val);
3094 setvar_u(newvar(exprc), val);
3095 free(exprc);
3096 return TRUE;
3099 /* switch to next input file */
3100 static rstream *next_input_file(void)
3102 #define rsm (G.next_input_file__rsm)
3103 #define files_happen (G.next_input_file__files_happen)
3105 FILE *F;
3106 const char *fname, *ind;
3108 if (rsm.F)
3109 fclose(rsm.F);
3110 rsm.F = NULL;
3111 rsm.pos = rsm.adv = 0;
3113 for (;;) {
3114 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3115 if (files_happen)
3116 return NULL;
3117 fname = "-";
3118 F = stdin;
3119 break;
3121 ind = getvar_s(incvar(intvar[ARGIND]));
3122 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3123 if (fname && *fname && !is_assignment(fname)) {
3124 F = xfopen_stdin(fname);
3125 break;
3129 files_happen = TRUE;
3130 setvar_s(intvar[FILENAME], fname);
3131 rsm.F = F;
3132 return &rsm;
3133 #undef rsm
3134 #undef files_happen
3137 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3138 int awk_main(int argc, char **argv)
3140 unsigned opt;
3141 char *opt_F;
3142 llist_t *list_v = NULL;
3143 llist_t *list_f = NULL;
3144 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3145 llist_t *list_e = NULL;
3146 #endif
3147 int i, j;
3148 var *v;
3149 var tv;
3150 char **envp;
3151 char *vnames = (char *)vNames; /* cheat */
3152 char *vvalues = (char *)vValues;
3154 INIT_G();
3156 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3157 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3158 if (ENABLE_LOCALE_SUPPORT)
3159 setlocale(LC_NUMERIC, "C");
3161 zero_out_var(&tv);
3163 /* allocate global buffer */
3164 g_buf = xmalloc(MAXVARFMT + 1);
3166 vhash = hash_init();
3167 ahash = hash_init();
3168 fdhash = hash_init();
3169 fnhash = hash_init();
3171 /* initialize variables */
3172 for (i = 0; *vnames; i++) {
3173 intvar[i] = v = newvar(nextword(&vnames));
3174 if (*vvalues != '\377')
3175 setvar_s(v, nextword(&vvalues));
3176 else
3177 setvar_i(v, 0);
3179 if (*vnames == '*') {
3180 v->type |= VF_SPECIAL;
3181 vnames++;
3185 handle_special(intvar[FS]);
3186 handle_special(intvar[RS]);
3188 newfile("/dev/stdin")->F = stdin;
3189 newfile("/dev/stdout")->F = stdout;
3190 newfile("/dev/stderr")->F = stderr;
3192 /* Huh, people report that sometimes environ is NULL. Oh well. */
3193 if (environ) for (envp = environ; *envp; envp++) {
3194 /* environ is writable, thus we don't strdup it needlessly */
3195 char *s = *envp;
3196 char *s1 = strchr(s, '=');
3197 if (s1) {
3198 *s1 = '\0';
3199 /* Both findvar and setvar_u take const char*
3200 * as 2nd arg -> environment is not trashed */
3201 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3202 *s1 = '=';
3205 opt_complementary = OPTCOMPLSTR_AWK;
3206 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3207 argv += optind;
3208 argc -= optind;
3209 if (opt & OPT_W)
3210 bb_error_msg("warning: option -W is ignored");
3211 if (opt & OPT_F) {
3212 unescape_string_in_place(opt_F);
3213 setvar_s(intvar[FS], opt_F);
3215 while (list_v) {
3216 if (!is_assignment(llist_pop(&list_v)))
3217 bb_show_usage();
3219 while (list_f) {
3220 char *s = NULL;
3221 FILE *from_file;
3223 g_progname = llist_pop(&list_f);
3224 from_file = xfopen_stdin(g_progname);
3225 /* one byte is reserved for some trick in next_token */
3226 for (i = j = 1; j > 0; i += j) {
3227 s = xrealloc(s, i + 4096);
3228 j = fread(s + i, 1, 4094, from_file);
3230 s[i] = '\0';
3231 fclose(from_file);
3232 parse_program(s + 1);
3233 free(s);
3235 g_progname = "cmd. line";
3236 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3237 while (list_e) {
3238 parse_program(llist_pop(&list_e));
3240 #endif
3241 if (!(opt & (OPT_f | OPT_e))) {
3242 if (!*argv)
3243 bb_show_usage();
3244 parse_program(*argv++);
3245 argc--;
3248 /* fill in ARGV array */
3249 setvar_i(intvar[ARGC], argc + 1);
3250 setari_u(intvar[ARGV], 0, "awk");
3251 i = 0;
3252 while (*argv)
3253 setari_u(intvar[ARGV], ++i, *argv++);
3255 evaluate(beginseq.first, &tv);
3256 if (!mainseq.first && !endseq.first)
3257 awk_exit(EXIT_SUCCESS);
3259 /* input file could already be opened in BEGIN block */
3260 if (!iF)
3261 iF = next_input_file();
3263 /* passing through input files */
3264 while (iF) {
3265 nextfile = FALSE;
3266 setvar_i(intvar[FNR], 0);
3268 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3269 nextrec = FALSE;
3270 incvar(intvar[NR]);
3271 incvar(intvar[FNR]);
3272 evaluate(mainseq.first, &tv);
3274 if (nextfile)
3275 break;
3278 if (i < 0)
3279 syntax_error(strerror(errno));
3281 iF = next_input_file();
3284 awk_exit(EXIT_SUCCESS);
3285 /*return 0;*/