1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 //config: Awk is used as a pattern scanning and processing language. This is
15 //config: the BusyBox implementation of that programming language.
17 //config:config FEATURE_AWK_LIBM
18 //config: bool "Enable math functions (requires libm)"
20 //config: depends on AWK
22 //config: Enable math functions of the Awk programming language.
23 //config: NOTE: This will require libm to be present for linking.
25 //config:config FEATURE_AWK_GNU_EXTENSIONS
26 //config: bool "Enable a few GNU extensions"
28 //config: depends on AWK
30 //config: Enable a few features from gawk:
31 //config: * command line option -e AWK_PROGRAM
32 //config: * simultaneous use of -f and -e on the command line.
33 //config: This enables the use of awk library files.
34 //config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
36 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
38 //kbuild:lib-$(CONFIG_AWK) += awk.o
40 //usage:#define awk_trivial_usage
41 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42 //usage:#define awk_full_usage "\n\n"
43 //usage: " -v VAR=VAL Set variable"
44 //usage: "\n -F SEP Use SEP as field separator"
45 //usage: "\n -f FILE Read program from FILE"
46 //usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47 //usage: "\n -e AWK_PROGRAM"
54 /* This is a NOEXEC applet. Be very careful! */
57 /* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59 #define debug_printf_walker(...) do {} while (0)
60 #define debug_printf_eval(...) do {} while (0)
61 #define debug_printf_parse(...) do {} while (0)
63 #ifndef debug_printf_walker
64 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
66 #ifndef debug_printf_eval
67 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
69 #ifndef debug_printf_parse
70 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
78 #define OPTCOMPLSTR_AWK \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
82 OPTBIT_F
, /* define field separator */
83 OPTBIT_v
, /* define variable */
84 OPTBIT_f
, /* pull in awk program from file */
85 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e
,) /* -e AWK_PROGRAM */
86 OPTBIT_W
, /* -W ignored */
87 OPT_F
= 1 << OPTBIT_F
,
88 OPT_v
= 1 << OPTBIT_v
,
89 OPT_f
= 1 << OPTBIT_f
,
90 OPT_e
= IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e
)) + 0,
98 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
99 #define VF_ARRAY 0x0002 /* 1 = it's an array */
101 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
105 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
106 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
109 /* these flags are static, don't change them when value is changed */
110 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
112 typedef struct walker_list
{
115 struct walker_list
*prev
;
120 typedef struct var_s
{
121 unsigned type
; /* flags */
125 int aidx
; /* func arg idx (for compilation stage) */
126 struct xhash_s
*array
; /* array ptr */
127 struct var_s
*parent
; /* for func args, ptr to actual parameter */
128 walker_list
*walker
; /* list of array elements (for..in) */
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s
{
134 struct node_s
*first
;
136 const char *programname
;
140 typedef struct func_s
{
146 typedef struct rstream_s
{
155 typedef struct hash_item_s
{
157 struct var_s v
; /* variable/array hash */
158 struct rstream_s rs
; /* redirect streams hash */
159 struct func_s f
; /* functions hash */
161 struct hash_item_s
*next
; /* next in chain */
162 char name
[1]; /* really it's longer */
165 typedef struct xhash_s
{
166 unsigned nel
; /* num of elements */
167 unsigned csize
; /* current hash size */
168 unsigned nprime
; /* next hash size in PRIMES[] */
169 unsigned glen
; /* summary length of item names */
170 struct hash_item_s
**items
;
174 typedef struct node_s
{
194 /* Block of temporary variables */
195 typedef struct nvblock_s
{
198 struct nvblock_s
*prev
;
199 struct nvblock_s
*next
;
203 typedef struct tsplitter_s
{
208 /* simple token classes */
209 /* Order and hex values are very important!!! See next_token() */
210 #define TC_SEQSTART (1 << 0) /* ( */
211 #define TC_SEQTERM (1 << 1) /* ) */
212 #define TC_REGEXP (1 << 2) /* /.../ */
213 #define TC_OUTRDR (1 << 3) /* | > >> */
214 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
215 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
216 #define TC_BINOPX (1 << 6) /* two-opnd operator */
217 #define TC_IN (1 << 7)
218 #define TC_COMMA (1 << 8)
219 #define TC_PIPE (1 << 9) /* input redirection pipe */
220 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
221 #define TC_ARRTERM (1 << 11) /* ] */
222 #define TC_GRPSTART (1 << 12) /* { */
223 #define TC_GRPTERM (1 << 13) /* } */
224 #define TC_SEMICOL (1 << 14)
225 #define TC_NEWLINE (1 << 15)
226 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
227 #define TC_WHILE (1 << 17)
228 #define TC_ELSE (1 << 18)
229 #define TC_BUILTIN (1 << 19)
230 /* This costs ~50 bytes of code.
231 * A separate class to support deprecated "length" form. If we don't need that
232 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
233 * can be merged with TC_BUILTIN:
235 #define TC_LENGTH (1 << 20)
236 #define TC_GETLINE (1 << 21)
237 #define TC_FUNCDECL (1 << 22) /* `function' `func' */
238 #define TC_BEGIN (1 << 23)
239 #define TC_END (1 << 24)
240 #define TC_EOF (1 << 25)
241 #define TC_VARIABLE (1 << 26)
242 #define TC_ARRAY (1 << 27)
243 #define TC_FUNCTION (1 << 28)
244 #define TC_STRING (1 << 29)
245 #define TC_NUMBER (1 << 30)
247 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
249 /* combined token classes */
250 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
251 //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
252 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
253 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
254 | TC_SEQSTART | TC_STRING | TC_NUMBER)
256 #define TC_STATEMNT (TC_STATX | TC_WHILE)
257 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
259 /* word tokens, cannot mean something else if not expected */
260 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
261 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
262 | TC_FUNCDECL | TC_BEGIN | TC_END)
264 /* discard newlines after these */
265 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
266 | TC_BINOP | TC_OPTERM)
268 /* what can expression begin with */
269 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
270 /* what can group begin with */
271 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
273 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
274 /* operator is inserted between them */
275 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
276 | TC_STRING | TC_NUMBER | TC_UOPPOST)
277 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
279 #define OF_RES1 0x010000
280 #define OF_RES2 0x020000
281 #define OF_STR1 0x040000
282 #define OF_STR2 0x080000
283 #define OF_NUM1 0x100000
284 #define OF_CHECKED 0x200000
286 /* combined operator flags */
289 #define xS (OF_RES2 | OF_STR2)
291 #define VV (OF_RES1 | OF_RES2)
292 #define Nx (OF_RES1 | OF_NUM1)
293 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
294 #define Sx (OF_RES1 | OF_STR1)
295 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
296 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
298 #define OPCLSMASK 0xFF00
299 #define OPNMASK 0x007F
301 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
302 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
303 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
308 #define P(x) (x << 24)
309 #define PRIMASK 0x7F000000
310 #define PRIMASK2 0x7E000000
312 /* Operation classes */
314 #define SHIFT_TIL_THIS 0x0600
315 #define RECUR_FROM_THIS 0x1000
318 OC_DELETE
= 0x0100, OC_EXEC
= 0x0200, OC_NEWSOURCE
= 0x0300,
319 OC_PRINT
= 0x0400, OC_PRINTF
= 0x0500, OC_WALKINIT
= 0x0600,
321 OC_BR
= 0x0700, OC_BREAK
= 0x0800, OC_CONTINUE
= 0x0900,
322 OC_EXIT
= 0x0a00, OC_NEXT
= 0x0b00, OC_NEXTFILE
= 0x0c00,
323 OC_TEST
= 0x0d00, OC_WALKNEXT
= 0x0e00,
325 OC_BINARY
= 0x1000, OC_BUILTIN
= 0x1100, OC_COLON
= 0x1200,
326 OC_COMMA
= 0x1300, OC_COMPARE
= 0x1400, OC_CONCAT
= 0x1500,
327 OC_FBLTIN
= 0x1600, OC_FIELD
= 0x1700, OC_FNARG
= 0x1800,
328 OC_FUNC
= 0x1900, OC_GETLINE
= 0x1a00, OC_IN
= 0x1b00,
329 OC_LAND
= 0x1c00, OC_LOR
= 0x1d00, OC_MATCH
= 0x1e00,
330 OC_MOVE
= 0x1f00, OC_PGETLINE
= 0x2000, OC_REGEXP
= 0x2100,
331 OC_REPLACE
= 0x2200, OC_RETURN
= 0x2300, OC_SPRINTF
= 0x2400,
332 OC_TERNARY
= 0x2500, OC_UNARY
= 0x2600, OC_VAR
= 0x2700,
335 ST_IF
= 0x3000, ST_DO
= 0x3100, ST_FOR
= 0x3200,
339 /* simple builtins */
341 F_in
, F_rn
, F_co
, F_ex
, F_lg
, F_si
, F_sq
, F_sr
,
342 F_ti
, F_le
, F_sy
, F_ff
, F_cl
347 B_a2
, B_ix
, B_ma
, B_sp
, B_ss
, B_ti
, B_mt
, B_lo
, B_up
,
349 B_an
, B_co
, B_ls
, B_or
, B_rs
, B_xo
,
352 /* tokens and their corresponding info values */
354 #define NTC "\377" /* switch to next token class (tc<<1) */
357 static const char tokenlist
[] ALIGN1
=
358 "\1(" NTC
/* TC_SEQSTART */
359 "\1)" NTC
/* TC_SEQTERM */
360 "\1/" NTC
/* TC_REGEXP */
361 "\2>>" "\1>" "\1|" NTC
/* TC_OUTRDR */
362 "\2++" "\2--" NTC
/* TC_UOPPOST */
363 "\2++" "\2--" "\1$" NTC
/* TC_UOPPRE1 */
364 "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */
365 "\2*=" "\2/=" "\2%=" "\2^="
366 "\1+" "\1-" "\3**=" "\2**"
367 "\1/" "\1%" "\1^" "\1*"
368 "\2!=" "\2>=" "\2<=" "\1>"
369 "\1<" "\2!~" "\1~" "\2&&"
370 "\2||" "\1?" "\1:" NTC
371 "\2in" NTC
/* TC_IN */
372 "\1," NTC
/* TC_COMMA */
373 "\1|" NTC
/* TC_PIPE */
374 "\1+" "\1-" "\1!" NTC
/* TC_UOPPRE2 */
375 "\1]" NTC
/* TC_ARRTERM */
376 "\1{" NTC
/* TC_GRPSTART */
377 "\1}" NTC
/* TC_GRPTERM */
378 "\1;" NTC
/* TC_SEMICOL */
379 "\1\n" NTC
/* TC_NEWLINE */
380 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
381 "\10continue" "\6delete" "\5print"
382 "\6printf" "\4next" "\10nextfile"
383 "\6return" "\4exit" NTC
384 "\5while" NTC
/* TC_WHILE */
385 "\4else" NTC
/* TC_ELSE */
386 "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */
388 "\5close" "\6system" "\6fflush" "\5atan2"
389 "\3cos" "\3exp" "\3int" "\3log"
390 "\4rand" "\3sin" "\4sqrt" "\5srand"
391 "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
392 "\5match" "\5split" "\7sprintf" "\3sub"
393 "\6substr" "\7systime" "\10strftime" "\6mktime"
394 "\7tolower" "\7toupper" NTC
395 "\6length" NTC
/* TC_LENGTH */
396 "\7getline" NTC
/* TC_GETLINE */
397 "\4func" "\10function" NTC
/* TC_FUNCDECL */
398 "\5BEGIN" NTC
/* TC_BEGIN */
400 /* compiler adds trailing "\0" */
403 #define OC_B OC_BUILTIN
405 static const uint32_t tokeninfo
[] = {
409 xS
|'a', xS
|'w', xS
|'|',
410 OC_UNARY
|xV
|P(9)|'p', OC_UNARY
|xV
|P(9)|'m',
411 OC_UNARY
|xV
|P(9)|'P', OC_UNARY
|xV
|P(9)|'M', OC_FIELD
|xV
|P(5),
412 OC_COMPARE
|VV
|P(39)|5, OC_MOVE
|VV
|P(74), OC_REPLACE
|NV
|P(74)|'+', OC_REPLACE
|NV
|P(74)|'-',
413 OC_REPLACE
|NV
|P(74)|'*', OC_REPLACE
|NV
|P(74)|'/', OC_REPLACE
|NV
|P(74)|'%', OC_REPLACE
|NV
|P(74)|'&',
414 OC_BINARY
|NV
|P(29)|'+', OC_BINARY
|NV
|P(29)|'-', OC_REPLACE
|NV
|P(74)|'&', OC_BINARY
|NV
|P(15)|'&',
415 OC_BINARY
|NV
|P(25)|'/', OC_BINARY
|NV
|P(25)|'%', OC_BINARY
|NV
|P(15)|'&', OC_BINARY
|NV
|P(25)|'*',
416 OC_COMPARE
|VV
|P(39)|4, OC_COMPARE
|VV
|P(39)|3, OC_COMPARE
|VV
|P(39)|0, OC_COMPARE
|VV
|P(39)|1,
417 OC_COMPARE
|VV
|P(39)|2, OC_MATCH
|Sx
|P(45)|'!', OC_MATCH
|Sx
|P(45)|'~', OC_LAND
|Vx
|P(55),
418 OC_LOR
|Vx
|P(59), OC_TERNARY
|Vx
|P(64)|'?', OC_COLON
|xx
|P(67)|':',
419 OC_IN
|SV
|P(49), /* TC_IN */
421 OC_PGETLINE
|SV
|P(37),
422 OC_UNARY
|xV
|P(19)|'+', OC_UNARY
|xV
|P(19)|'-', OC_UNARY
|xV
|P(19)|'!',
428 ST_IF
, ST_DO
, ST_FOR
, OC_BREAK
,
429 OC_CONTINUE
, OC_DELETE
|Vx
, OC_PRINT
,
430 OC_PRINTF
, OC_NEXT
, OC_NEXTFILE
,
431 OC_RETURN
|Vx
, OC_EXIT
|Nx
,
434 OC_B
|B_an
|P(0x83), OC_B
|B_co
|P(0x41), OC_B
|B_ls
|P(0x83), OC_B
|B_or
|P(0x83),
435 OC_B
|B_rs
|P(0x83), OC_B
|B_xo
|P(0x83),
436 OC_FBLTIN
|Sx
|F_cl
, OC_FBLTIN
|Sx
|F_sy
, OC_FBLTIN
|Sx
|F_ff
, OC_B
|B_a2
|P(0x83),
437 OC_FBLTIN
|Nx
|F_co
, OC_FBLTIN
|Nx
|F_ex
, OC_FBLTIN
|Nx
|F_in
, OC_FBLTIN
|Nx
|F_lg
,
438 OC_FBLTIN
|F_rn
, OC_FBLTIN
|Nx
|F_si
, OC_FBLTIN
|Nx
|F_sq
, OC_FBLTIN
|Nx
|F_sr
,
439 OC_B
|B_ge
|P(0xd6), OC_B
|B_gs
|P(0xb6), OC_B
|B_ix
|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
440 OC_B
|B_ma
|P(0x89), OC_B
|B_sp
|P(0x8b), OC_SPRINTF
, OC_B
|B_su
|P(0xb6),
441 OC_B
|B_ss
|P(0x8f), OC_FBLTIN
|F_ti
, OC_B
|B_ti
|P(0x0b), OC_B
|B_mt
|P(0x0b),
442 OC_B
|B_lo
|P(0x49), OC_B
|B_up
|P(0x49),
443 OC_FBLTIN
|Sx
|F_le
, /* TC_LENGTH */
450 /* internal variable names and their initial values */
451 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
453 CONVFMT
, OFMT
, FS
, OFS
,
454 ORS
, RS
, RT
, FILENAME
,
455 SUBSEP
, F0
, ARGIND
, ARGC
,
456 ARGV
, ERRNO
, FNR
, NR
,
457 NF
, IGNORECASE
, ENVIRON
, NUM_INTERNAL_VARS
460 static const char vNames
[] ALIGN1
=
461 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
462 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
463 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
464 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
465 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
467 static const char vValues
[] ALIGN1
=
468 "%.6g\0" "%.6g\0" " \0" " \0"
469 "\n\0" "\n\0" "\0" "\0"
470 "\034\0" "\0" "\377";
472 /* hash size may grow to these values */
473 #define FIRST_PRIME 61
474 static const uint16_t PRIMES
[] ALIGN2
= { 251, 1021, 4093, 16381, 65521 };
477 /* Globals. Split in two parts so that first one is addressed
478 * with (mostly short) negative offsets.
479 * NB: it's unsafe to put members of type "double"
480 * into globals2 (gcc may fail to align them).
484 chain beginseq
, mainseq
, endseq
;
486 node
*break_ptr
, *continue_ptr
;
488 xhash
*vhash
, *ahash
, *fdhash
, *fnhash
;
489 const char *g_progname
;
492 int maxfields
; /* used in fsrealloc() only */
501 smallint is_f0_split
;
505 uint32_t t_info
; /* often used */
510 var
*intvar
[NUM_INTERNAL_VARS
]; /* often used */
512 /* former statics from various functions */
513 char *split_f0__fstrings
;
515 uint32_t next_token__save_tclass
;
516 uint32_t next_token__save_info
;
517 uint32_t next_token__ltclass
;
518 smallint next_token__concat_inserted
;
520 smallint next_input_file__files_happen
;
521 rstream next_input_file__rsm
;
523 var
*evaluate__fnargs
;
524 unsigned evaluate__seed
;
525 regex_t evaluate__sreg
;
529 tsplitter exec_builtin__tspl
;
531 /* biggest and least used members go last */
532 tsplitter fsplitter
, rsplitter
;
534 #define G1 (ptr_to_globals[-1])
535 #define G (*(struct globals2 *)ptr_to_globals)
536 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
537 /*char G1size[sizeof(G1)]; - 0x74 */
538 /*char Gsize[sizeof(G)]; - 0x1c4 */
539 /* Trying to keep most of members accessible with short offsets: */
540 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
541 #define t_double (G1.t_double )
542 #define beginseq (G1.beginseq )
543 #define mainseq (G1.mainseq )
544 #define endseq (G1.endseq )
545 #define seq (G1.seq )
546 #define break_ptr (G1.break_ptr )
547 #define continue_ptr (G1.continue_ptr)
549 #define vhash (G1.vhash )
550 #define ahash (G1.ahash )
551 #define fdhash (G1.fdhash )
552 #define fnhash (G1.fnhash )
553 #define g_progname (G1.g_progname )
554 #define g_lineno (G1.g_lineno )
555 #define nfields (G1.nfields )
556 #define maxfields (G1.maxfields )
557 #define Fields (G1.Fields )
558 #define g_cb (G1.g_cb )
559 #define g_pos (G1.g_pos )
560 #define g_buf (G1.g_buf )
561 #define icase (G1.icase )
562 #define exiting (G1.exiting )
563 #define nextrec (G1.nextrec )
564 #define nextfile (G1.nextfile )
565 #define is_f0_split (G1.is_f0_split )
566 #define t_rollback (G1.t_rollback )
567 #define t_info (G.t_info )
568 #define t_tclass (G.t_tclass )
569 #define t_string (G.t_string )
570 #define t_lineno (G.t_lineno )
571 #define intvar (G.intvar )
572 #define fsplitter (G.fsplitter )
573 #define rsplitter (G.rsplitter )
574 #define INIT_G() do { \
575 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
576 G.next_token__ltclass = TC_OPTERM; \
577 G.evaluate__seed = 1; \
581 /* function prototypes */
582 static void handle_special(var
*);
583 static node
*parse_expr(uint32_t);
584 static void chain_group(void);
585 static var
*evaluate(node
*, var
*);
586 static rstream
*next_input_file(void);
587 static int fmt_num(char *, int, const char *, double, int);
588 static int awk_exit(int) NORETURN
;
590 /* ---- error handling ---- */
592 static const char EMSG_INTERNAL_ERROR
[] ALIGN1
= "Internal error";
593 static const char EMSG_UNEXP_EOS
[] ALIGN1
= "Unexpected end of string";
594 static const char EMSG_UNEXP_TOKEN
[] ALIGN1
= "Unexpected token";
595 static const char EMSG_DIV_BY_ZERO
[] ALIGN1
= "Division by zero";
596 static const char EMSG_INV_FMT
[] ALIGN1
= "Invalid format specifier";
597 static const char EMSG_TOO_FEW_ARGS
[] ALIGN1
= "Too few arguments for builtin";
598 static const char EMSG_NOT_ARRAY
[] ALIGN1
= "Not an array";
599 static const char EMSG_POSSIBLE_ERROR
[] ALIGN1
= "Possible syntax error";
600 static const char EMSG_UNDEF_FUNC
[] ALIGN1
= "Call to undefined function";
601 static const char EMSG_NO_MATH
[] ALIGN1
= "Math support is not compiled in";
603 static void zero_out_var(var
*vp
)
605 memset(vp
, 0, sizeof(*vp
));
608 static void syntax_error(const char *message
) NORETURN
;
609 static void syntax_error(const char *message
)
611 bb_error_msg_and_die("%s:%i: %s", g_progname
, g_lineno
, message
);
614 /* ---- hash stuff ---- */
616 static unsigned hashidx(const char *name
)
621 idx
= *name
++ + (idx
<< 6) - idx
;
625 /* create new hash */
626 static xhash
*hash_init(void)
630 newhash
= xzalloc(sizeof(*newhash
));
631 newhash
->csize
= FIRST_PRIME
;
632 newhash
->items
= xzalloc(FIRST_PRIME
* sizeof(newhash
->items
[0]));
637 /* find item in hash, return ptr to data, NULL if not found */
638 static void *hash_search(xhash
*hash
, const char *name
)
642 hi
= hash
->items
[hashidx(name
) % hash
->csize
];
644 if (strcmp(hi
->name
, name
) == 0)
651 /* grow hash if it becomes too big */
652 static void hash_rebuild(xhash
*hash
)
654 unsigned newsize
, i
, idx
;
655 hash_item
**newitems
, *hi
, *thi
;
657 if (hash
->nprime
== ARRAY_SIZE(PRIMES
))
660 newsize
= PRIMES
[hash
->nprime
++];
661 newitems
= xzalloc(newsize
* sizeof(newitems
[0]));
663 for (i
= 0; i
< hash
->csize
; i
++) {
668 idx
= hashidx(thi
->name
) % newsize
;
669 thi
->next
= newitems
[idx
];
675 hash
->csize
= newsize
;
676 hash
->items
= newitems
;
679 /* find item in hash, add it if necessary. Return ptr to data */
680 static void *hash_find(xhash
*hash
, const char *name
)
686 hi
= hash_search(hash
, name
);
688 if (++hash
->nel
/ hash
->csize
> 10)
691 l
= strlen(name
) + 1;
692 hi
= xzalloc(sizeof(*hi
) + l
);
693 strcpy(hi
->name
, name
);
695 idx
= hashidx(name
) % hash
->csize
;
696 hi
->next
= hash
->items
[idx
];
697 hash
->items
[idx
] = hi
;
703 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
704 #define newvar(name) ((var*) hash_find(vhash, (name)))
705 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
706 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
708 static void hash_remove(xhash
*hash
, const char *name
)
710 hash_item
*hi
, **phi
;
712 phi
= &hash
->items
[hashidx(name
) % hash
->csize
];
715 if (strcmp(hi
->name
, name
) == 0) {
716 hash
->glen
-= (strlen(name
) + 1);
726 /* ------ some useful functions ------ */
728 static char *skip_spaces(char *p
)
731 if (*p
== '\\' && p
[1] == '\n') {
734 } else if (*p
!= ' ' && *p
!= '\t') {
742 /* returns old *s, advances *s past word and terminating NUL */
743 static char *nextword(char **s
)
746 while (*(*s
)++ != '\0')
751 static char nextchar(char **s
)
758 c
= bb_process_escape_sequence((const char**)s
);
759 /* Example awk statement:
761 * we must treat \" as "
763 if (c
== '\\' && *s
== pps
) { /* unrecognized \z? */
764 c
= *(*s
); /* yes, fetch z */
766 (*s
)++; /* advance unless z = NUL */
771 /* TODO: merge with strcpy_and_process_escape_sequences()?
773 static void unescape_string_in_place(char *s1
)
776 while ((*s1
= nextchar(&s
)) != '\0')
780 static ALWAYS_INLINE
int isalnum_(int c
)
782 return (isalnum(c
) || c
== '_');
785 static double my_strtod(char **pp
)
788 if (ENABLE_DESKTOP
&& cp
[0] == '0') {
789 /* Might be hex or octal integer: 0x123abc or 07777 */
790 char c
= (cp
[1] | 0x20);
791 if (c
== 'x' || isdigit(cp
[1])) {
792 unsigned long long ull
= strtoull(cp
, pp
, 0);
796 if (!isdigit(c
) && c
!= '.')
798 /* else: it may be a floating number. Examples:
799 * 009.123 (*pp points to '9')
800 * 000.123 (*pp points to '.')
801 * fall through to strtod.
805 return strtod(cp
, pp
);
808 /* -------- working with variables (set/get/copy/etc) -------- */
810 static xhash
*iamarray(var
*v
)
814 while (a
->type
& VF_CHILD
)
817 if (!(a
->type
& VF_ARRAY
)) {
819 a
->x
.array
= hash_init();
824 static void clear_array(xhash
*array
)
829 for (i
= 0; i
< array
->csize
; i
++) {
830 hi
= array
->items
[i
];
834 free(thi
->data
.v
.string
);
837 array
->items
[i
] = NULL
;
839 array
->glen
= array
->nel
= 0;
842 /* clear a variable */
843 static var
*clrvar(var
*v
)
845 if (!(v
->type
& VF_FSTR
))
848 v
->type
&= VF_DONTTOUCH
;
854 /* assign string value to variable */
855 static var
*setvar_p(var
*v
, char *value
)
863 /* same as setvar_p but make a copy of string */
864 static var
*setvar_s(var
*v
, const char *value
)
866 return setvar_p(v
, (value
&& *value
) ? xstrdup(value
) : NULL
);
869 /* same as setvar_s but sets USER flag */
870 static var
*setvar_u(var
*v
, const char *value
)
872 v
= setvar_s(v
, value
);
877 /* set array element to user string */
878 static void setari_u(var
*a
, int idx
, const char *s
)
882 v
= findvar(iamarray(a
), itoa(idx
));
886 /* assign numeric value to variable */
887 static var
*setvar_i(var
*v
, double value
)
890 v
->type
|= VF_NUMBER
;
896 static const char *getvar_s(var
*v
)
898 /* if v is numeric and has no cached string, convert it to string */
899 if ((v
->type
& (VF_NUMBER
| VF_CACHED
)) == VF_NUMBER
) {
900 fmt_num(g_buf
, MAXVARFMT
, getvar_s(intvar
[CONVFMT
]), v
->number
, TRUE
);
901 v
->string
= xstrdup(g_buf
);
902 v
->type
|= VF_CACHED
;
904 return (v
->string
== NULL
) ? "" : v
->string
;
907 static double getvar_i(var
*v
)
911 if ((v
->type
& (VF_NUMBER
| VF_CACHED
)) == 0) {
915 debug_printf_eval("getvar_i: '%s'->", s
);
916 v
->number
= my_strtod(&s
);
917 debug_printf_eval("%f (s:'%s')\n", v
->number
, s
);
918 if (v
->type
& VF_USER
) {
924 debug_printf_eval("getvar_i: '%s'->zero\n", s
);
927 v
->type
|= VF_CACHED
;
929 debug_printf_eval("getvar_i: %f\n", v
->number
);
933 /* Used for operands of bitwise ops */
934 static unsigned long getvar_i_int(var
*v
)
936 double d
= getvar_i(v
);
938 /* Casting doubles to longs is undefined for values outside
939 * of target type range. Try to widen it as much as possible */
941 return (unsigned long)d
;
942 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
943 return - (long) (unsigned long) (-d
);
946 static var
*copyvar(var
*dest
, const var
*src
)
950 dest
->type
|= (src
->type
& ~(VF_DONTTOUCH
| VF_FSTR
));
951 debug_printf_eval("copyvar: number:%f string:'%s'\n", src
->number
, src
->string
);
952 dest
->number
= src
->number
;
954 dest
->string
= xstrdup(src
->string
);
956 handle_special(dest
);
960 static var
*incvar(var
*v
)
962 return setvar_i(v
, getvar_i(v
) + 1.0);
965 /* return true if v is number or numeric string */
966 static int is_numeric(var
*v
)
969 return ((v
->type
^ VF_DIRTY
) & (VF_NUMBER
| VF_USER
| VF_DIRTY
));
972 /* return 1 when value of v corresponds to true, 0 otherwise */
973 static int istrue(var
*v
)
976 return (v
->number
!= 0);
977 return (v
->string
&& v
->string
[0]);
980 /* temporary variables allocator. Last allocated should be first freed */
981 static var
*nvalloc(int n
)
989 if ((g_cb
->pos
- g_cb
->nv
) + n
<= g_cb
->size
)
995 size
= (n
<= MINNVBLOCK
) ? MINNVBLOCK
: n
;
996 g_cb
= xzalloc(sizeof(nvblock
) + size
* sizeof(var
));
998 g_cb
->pos
= g_cb
->nv
;
1000 /*g_cb->next = NULL; - xzalloc did it */
1008 while (v
< g_cb
->pos
) {
1017 static void nvfree(var
*v
)
1021 if (v
< g_cb
->nv
|| v
>= g_cb
->pos
)
1022 syntax_error(EMSG_INTERNAL_ERROR
);
1024 for (p
= v
; p
< g_cb
->pos
; p
++) {
1025 if ((p
->type
& (VF_ARRAY
| VF_CHILD
)) == VF_ARRAY
) {
1026 clear_array(iamarray(p
));
1027 free(p
->x
.array
->items
);
1030 if (p
->type
& VF_WALK
) {
1032 walker_list
*w
= p
->x
.walker
;
1033 debug_printf_walker("nvfree: freeing walker @%p\n", &p
->x
.walker
);
1037 debug_printf_walker(" free(%p)\n", w
);
1046 while (g_cb
->prev
&& g_cb
->pos
== g_cb
->nv
) {
1051 /* ------- awk program text parsing ------- */
1053 /* Parse next token pointed by global pos, place results into global ttt.
1054 * If token isn't expected, give away. Return token class
1056 static uint32_t next_token(uint32_t expected
)
1058 #define concat_inserted (G.next_token__concat_inserted)
1059 #define save_tclass (G.next_token__save_tclass)
1060 #define save_info (G.next_token__save_info)
1061 /* Initialized to TC_OPTERM: */
1062 #define ltclass (G.next_token__ltclass)
1071 } else if (concat_inserted
) {
1072 concat_inserted
= FALSE
;
1073 t_tclass
= save_tclass
;
1079 g_lineno
= t_lineno
;
1081 while (*p
!= '\n' && *p
!= '\0')
1089 debug_printf_parse("%s: token found: TC_EOF\n", __func__
);
1090 } else if (*p
== '\"') {
1093 while (*p
!= '\"') {
1095 if (*p
== '\0' || *p
== '\n')
1096 syntax_error(EMSG_UNEXP_EOS
);
1098 *s
++ = nextchar(&pp
);
1104 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__
, t_string
);
1105 } else if ((expected
& TC_REGEXP
) && *p
== '/') {
1109 if (*p
== '\0' || *p
== '\n')
1110 syntax_error(EMSG_UNEXP_EOS
);
1114 s
[-1] = bb_process_escape_sequence((const char **)&pp
);
1126 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__
, t_string
);
1128 } else if (*p
== '.' || isdigit(*p
)) {
1131 t_double
= my_strtod(&pp
);
1134 syntax_error(EMSG_UNEXP_TOKEN
);
1136 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__
, t_double
);
1138 /* search for something known */
1143 int l
= (unsigned char) *tl
++;
1144 if (l
== (unsigned char) NTCC
) {
1148 /* if token class is expected,
1150 * and it's not a longer word,
1152 if ((tc
& (expected
| TC_WORD
| TC_NEWLINE
))
1153 && strncmp(p
, tl
, l
) == 0
1154 && !((tc
& TC_WORD
) && isalnum_(p
[l
]))
1156 /* then this is what we are looking for */
1158 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__
, l
, p
, t_info
);
1165 /* not a known token */
1167 /* is it a name? (var/array/function) */
1169 syntax_error(EMSG_UNEXP_TOKEN
); /* no */
1172 while (isalnum_(*++p
)) {
1177 /* also consume whitespace between functionname and bracket */
1178 if (!(expected
& TC_VARIABLE
) || (expected
& TC_ARRAY
))
1182 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__
, t_string
);
1187 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__
, t_string
);
1189 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__
, t_string
);
1195 /* skipping newlines in some cases */
1196 if ((ltclass
& TC_NOTERM
) && (tc
& TC_NEWLINE
))
1199 /* insert concatenation operator when needed */
1200 if ((ltclass
& TC_CONCAT1
) && (tc
& TC_CONCAT2
) && (expected
& TC_BINOP
)) {
1201 concat_inserted
= TRUE
;
1205 t_info
= OC_CONCAT
| SS
| P(35);
1212 /* Are we ready for this? */
1213 if (!(ltclass
& expected
)) {
1214 syntax_error((ltclass
& (TC_NEWLINE
| TC_EOF
)) ?
1215 EMSG_UNEXP_EOS
: EMSG_UNEXP_TOKEN
);
1219 #undef concat_inserted
1225 static void rollback_token(void)
1230 static node
*new_node(uint32_t info
)
1234 n
= xzalloc(sizeof(node
));
1236 n
->lineno
= g_lineno
;
1240 static void mk_re_node(const char *s
, node
*n
, regex_t
*re
)
1242 n
->info
= OC_REGEXP
;
1245 xregcomp(re
, s
, REG_EXTENDED
);
1246 xregcomp(re
+ 1, s
, REG_EXTENDED
| REG_ICASE
);
1249 static node
*condition(void)
1251 next_token(TC_SEQSTART
);
1252 return parse_expr(TC_SEQTERM
);
1255 /* parse expression terminated by given argument, return ptr
1256 * to built subtree. Terminator is eaten by parse_expr */
1257 static node
*parse_expr(uint32_t iexp
)
1265 debug_printf_parse("%s(%x)\n", __func__
, iexp
);
1268 sn
.r
.n
= glptr
= NULL
;
1269 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_REGEXP
| iexp
;
1271 while (!((tc
= next_token(xtc
)) & iexp
)) {
1273 if (glptr
&& (t_info
== (OC_COMPARE
| VV
| P(39) | 2))) {
1274 /* input redirection (<) attached to glptr node */
1275 debug_printf_parse("%s: input redir\n", __func__
);
1276 cn
= glptr
->l
.n
= new_node(OC_CONCAT
| SS
| P(37));
1278 xtc
= TC_OPERAND
| TC_UOPPRE
;
1281 } else if (tc
& (TC_BINOP
| TC_UOPPOST
)) {
1282 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__
);
1283 /* for binary and postfix-unary operators, jump back over
1284 * previous operators with higher priority */
1286 while (((t_info
& PRIMASK
) > (vn
->a
.n
->info
& PRIMASK2
))
1287 || ((t_info
== vn
->info
) && ((t_info
& OPCLSMASK
) == OC_COLON
))
1291 if ((t_info
& OPCLSMASK
) == OC_TERNARY
)
1293 cn
= vn
->a
.n
->r
.n
= new_node(t_info
);
1295 if (tc
& TC_BINOP
) {
1297 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_REGEXP
;
1298 if ((t_info
& OPCLSMASK
) == OC_PGETLINE
) {
1300 next_token(TC_GETLINE
);
1301 /* give maximum priority to this pipe */
1302 cn
->info
&= ~PRIMASK
;
1303 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_BINOP
| iexp
;
1307 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_BINOP
| iexp
;
1312 debug_printf_parse("%s: other\n", __func__
);
1313 /* for operands and prefix-unary operators, attach them
1316 cn
= vn
->r
.n
= new_node(t_info
);
1318 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_REGEXP
;
1319 if (tc
& (TC_OPERAND
| TC_REGEXP
)) {
1320 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__
);
1321 xtc
= TC_UOPPRE
| TC_UOPPOST
| TC_BINOP
| TC_OPERAND
| iexp
;
1322 /* one should be very careful with switch on tclass -
1323 * only simple tclasses should be used! */
1327 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__
);
1329 v
= hash_search(ahash
, t_string
);
1331 cn
->info
= OC_FNARG
;
1332 cn
->l
.aidx
= v
->x
.aidx
;
1334 cn
->l
.v
= newvar(t_string
);
1336 if (tc
& TC_ARRAY
) {
1338 cn
->r
.n
= parse_expr(TC_ARRTERM
);
1344 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__
);
1346 v
= cn
->l
.v
= xzalloc(sizeof(var
));
1348 setvar_i(v
, t_double
);
1350 setvar_s(v
, t_string
);
1354 debug_printf_parse("%s: TC_REGEXP\n", __func__
);
1355 mk_re_node(t_string
, cn
, xzalloc(sizeof(regex_t
)*2));
1359 debug_printf_parse("%s: TC_FUNCTION\n", __func__
);
1361 cn
->r
.f
= newfunc(t_string
);
1362 cn
->l
.n
= condition();
1366 debug_printf_parse("%s: TC_SEQSTART\n", __func__
);
1367 cn
= vn
->r
.n
= parse_expr(TC_SEQTERM
);
1369 syntax_error("Empty sequence");
1374 debug_printf_parse("%s: TC_GETLINE\n", __func__
);
1376 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_BINOP
| iexp
;
1380 debug_printf_parse("%s: TC_BUILTIN\n", __func__
);
1381 cn
->l
.n
= condition();
1385 debug_printf_parse("%s: TC_LENGTH\n", __func__
);
1386 next_token(TC_SEQSTART
| TC_OPTERM
| TC_GRPTERM
);
1388 if (t_tclass
& TC_SEQSTART
) {
1389 /* It was a "(" token. Handle just like TC_BUILTIN */
1390 cn
->l
.n
= condition();
1398 debug_printf_parse("%s() returns %p\n", __func__
, sn
.r
.n
);
1402 /* add node to chain. Return ptr to alloc'd node */
1403 static node
*chain_node(uint32_t info
)
1408 seq
->first
= seq
->last
= new_node(0);
1410 if (seq
->programname
!= g_progname
) {
1411 seq
->programname
= g_progname
;
1412 n
= chain_node(OC_NEWSOURCE
);
1413 n
->l
.new_progname
= xstrdup(g_progname
);
1418 seq
->last
= n
->a
.n
= new_node(OC_DONE
);
1423 static void chain_expr(uint32_t info
)
1427 n
= chain_node(info
);
1428 n
->l
.n
= parse_expr(TC_OPTERM
| TC_GRPTERM
);
1429 if (t_tclass
& TC_GRPTERM
)
1433 static node
*chain_loop(node
*nn
)
1435 node
*n
, *n2
, *save_brk
, *save_cont
;
1437 save_brk
= break_ptr
;
1438 save_cont
= continue_ptr
;
1440 n
= chain_node(OC_BR
| Vx
);
1441 continue_ptr
= new_node(OC_EXEC
);
1442 break_ptr
= new_node(OC_EXEC
);
1444 n2
= chain_node(OC_EXEC
| Vx
);
1447 continue_ptr
->a
.n
= n2
;
1448 break_ptr
->a
.n
= n
->r
.n
= seq
->last
;
1450 continue_ptr
= save_cont
;
1451 break_ptr
= save_brk
;
1456 /* parse group and attach it to chain */
1457 static void chain_group(void)
1463 c
= next_token(TC_GRPSEQ
);
1464 } while (c
& TC_NEWLINE
);
1466 if (c
& TC_GRPSTART
) {
1467 debug_printf_parse("%s: TC_GRPSTART\n", __func__
);
1468 while (next_token(TC_GRPSEQ
| TC_GRPTERM
) != TC_GRPTERM
) {
1469 debug_printf_parse("%s: !TC_GRPTERM\n", __func__
);
1470 if (t_tclass
& TC_NEWLINE
)
1475 debug_printf_parse("%s: TC_GRPTERM\n", __func__
);
1476 } else if (c
& (TC_OPSEQ
| TC_OPTERM
)) {
1477 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__
);
1479 chain_expr(OC_EXEC
| Vx
);
1482 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__
);
1483 switch (t_info
& OPCLSMASK
) {
1485 debug_printf_parse("%s: ST_IF\n", __func__
);
1486 n
= chain_node(OC_BR
| Vx
);
1487 n
->l
.n
= condition();
1489 n2
= chain_node(OC_EXEC
);
1491 if (next_token(TC_GRPSEQ
| TC_GRPTERM
| TC_ELSE
) == TC_ELSE
) {
1493 n2
->a
.n
= seq
->last
;
1500 debug_printf_parse("%s: ST_WHILE\n", __func__
);
1502 n
= chain_loop(NULL
);
1507 debug_printf_parse("%s: ST_DO\n", __func__
);
1508 n2
= chain_node(OC_EXEC
);
1509 n
= chain_loop(NULL
);
1511 next_token(TC_WHILE
);
1512 n
->l
.n
= condition();
1516 debug_printf_parse("%s: ST_FOR\n", __func__
);
1517 next_token(TC_SEQSTART
);
1518 n2
= parse_expr(TC_SEMICOL
| TC_SEQTERM
);
1519 if (t_tclass
& TC_SEQTERM
) { /* for-in */
1520 if ((n2
->info
& OPCLSMASK
) != OC_IN
)
1521 syntax_error(EMSG_UNEXP_TOKEN
);
1522 n
= chain_node(OC_WALKINIT
| VV
);
1525 n
= chain_loop(NULL
);
1526 n
->info
= OC_WALKNEXT
| Vx
;
1528 } else { /* for (;;) */
1529 n
= chain_node(OC_EXEC
| Vx
);
1531 n2
= parse_expr(TC_SEMICOL
);
1532 n3
= parse_expr(TC_SEQTERM
);
1542 debug_printf_parse("%s: OC_PRINT[F]\n", __func__
);
1543 n
= chain_node(t_info
);
1544 n
->l
.n
= parse_expr(TC_OPTERM
| TC_OUTRDR
| TC_GRPTERM
);
1545 if (t_tclass
& TC_OUTRDR
) {
1547 n
->r
.n
= parse_expr(TC_OPTERM
| TC_GRPTERM
);
1549 if (t_tclass
& TC_GRPTERM
)
1554 debug_printf_parse("%s: OC_BREAK\n", __func__
);
1555 n
= chain_node(OC_EXEC
);
1561 debug_printf_parse("%s: OC_CONTINUE\n", __func__
);
1562 n
= chain_node(OC_EXEC
);
1563 n
->a
.n
= continue_ptr
;
1567 /* delete, next, nextfile, return, exit */
1569 debug_printf_parse("%s: default\n", __func__
);
1575 static void parse_program(char *p
)
1584 while ((tclass
= next_token(TC_EOF
| TC_OPSEQ
| TC_GRPSTART
|
1585 TC_OPTERM
| TC_BEGIN
| TC_END
| TC_FUNCDECL
)) != TC_EOF
) {
1587 if (tclass
& TC_OPTERM
) {
1588 debug_printf_parse("%s: TC_OPTERM\n", __func__
);
1593 if (tclass
& TC_BEGIN
) {
1594 debug_printf_parse("%s: TC_BEGIN\n", __func__
);
1597 } else if (tclass
& TC_END
) {
1598 debug_printf_parse("%s: TC_END\n", __func__
);
1601 } else if (tclass
& TC_FUNCDECL
) {
1602 debug_printf_parse("%s: TC_FUNCDECL\n", __func__
);
1603 next_token(TC_FUNCTION
);
1605 f
= newfunc(t_string
);
1606 f
->body
.first
= NULL
;
1608 while (next_token(TC_VARIABLE
| TC_SEQTERM
) & TC_VARIABLE
) {
1609 v
= findvar(ahash
, t_string
);
1610 v
->x
.aidx
= f
->nargs
++;
1612 if (next_token(TC_COMMA
| TC_SEQTERM
) & TC_SEQTERM
)
1618 } else if (tclass
& TC_OPSEQ
) {
1619 debug_printf_parse("%s: TC_OPSEQ\n", __func__
);
1621 cn
= chain_node(OC_TEST
);
1622 cn
->l
.n
= parse_expr(TC_OPTERM
| TC_EOF
| TC_GRPSTART
);
1623 if (t_tclass
& TC_GRPSTART
) {
1624 debug_printf_parse("%s: TC_GRPSTART\n", __func__
);
1628 debug_printf_parse("%s: !TC_GRPSTART\n", __func__
);
1629 chain_node(OC_PRINT
);
1631 cn
->r
.n
= mainseq
.last
;
1632 } else /* if (tclass & TC_GRPSTART) */ {
1633 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__
);
1638 debug_printf_parse("%s: TC_EOF\n", __func__
);
1642 /* -------- program execution part -------- */
1644 static node
*mk_splitter(const char *s
, tsplitter
*spl
)
1652 if ((n
->info
& OPCLSMASK
) == OC_REGEXP
) {
1654 regfree(ire
); // TODO: nuke ire, use re+1?
1656 if (s
[0] && s
[1]) { /* strlen(s) > 1 */
1657 mk_re_node(s
, n
, re
);
1659 n
->info
= (uint32_t) s
[0];
1665 /* use node as a regular expression. Supplied with node ptr and regex_t
1666 * storage space. Return ptr to regex (if result points to preg, it should
1667 * be later regfree'd manually
1669 static regex_t
*as_regex(node
*op
, regex_t
*preg
)
1675 if ((op
->info
& OPCLSMASK
) == OC_REGEXP
) {
1676 return icase
? op
->r
.ire
: op
->l
.re
;
1679 s
= getvar_s(evaluate(op
, v
));
1681 cflags
= icase
? REG_EXTENDED
| REG_ICASE
: REG_EXTENDED
;
1682 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1683 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1684 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1685 * (maybe gsub is not supposed to use REG_EXTENDED?).
1687 if (regcomp(preg
, s
, cflags
)) {
1688 cflags
&= ~REG_EXTENDED
;
1689 xregcomp(preg
, s
, cflags
);
1695 /* gradually increasing buffer.
1696 * note that we reallocate even if n == old_size,
1697 * and thus there is at least one extra allocated byte.
1699 static char* qrealloc(char *b
, int n
, int *size
)
1701 if (!b
|| n
>= *size
) {
1702 *size
= n
+ (n
>>1) + 80;
1703 b
= xrealloc(b
, *size
);
1708 /* resize field storage space */
1709 static void fsrealloc(int size
)
1713 if (size
>= maxfields
) {
1715 maxfields
= size
+ 16;
1716 Fields
= xrealloc(Fields
, maxfields
* sizeof(Fields
[0]));
1717 for (; i
< maxfields
; i
++) {
1718 Fields
[i
].type
= VF_SPECIAL
;
1719 Fields
[i
].string
= NULL
;
1722 /* if size < nfields, clear extra field variables */
1723 for (i
= size
; i
< nfields
; i
++) {
1729 static int awk_split(const char *s
, node
*spl
, char **slist
)
1734 regmatch_t pmatch
[2]; // TODO: why [2]? [1] is enough...
1736 /* in worst case, each char would be a separate field */
1737 *slist
= s1
= xzalloc(strlen(s
) * 2 + 3);
1740 c
[0] = c
[1] = (char)spl
->info
;
1742 if (*getvar_s(intvar
[RS
]) == '\0')
1746 if ((spl
->info
& OPCLSMASK
) == OC_REGEXP
) { /* regex split */
1748 return n
; /* "": zero fields */
1749 n
++; /* at least one field will be there */
1751 l
= strcspn(s
, c
+2); /* len till next NUL or \n */
1752 if (regexec(icase
? spl
->r
.ire
: spl
->l
.re
, s
, 1, pmatch
, 0) == 0
1753 && pmatch
[0].rm_so
<= l
1755 l
= pmatch
[0].rm_so
;
1756 if (pmatch
[0].rm_eo
== 0) {
1760 n
++; /* we saw yet another delimiter */
1762 pmatch
[0].rm_eo
= l
;
1767 /* make sure we remove *all* of the separator chars */
1770 } while (++l
< pmatch
[0].rm_eo
);
1772 s
+= pmatch
[0].rm_eo
;
1776 if (c
[0] == '\0') { /* null split */
1784 if (c
[0] != ' ') { /* single-character split */
1786 c
[0] = toupper(c
[0]);
1787 c
[1] = tolower(c
[1]);
1791 while ((s1
= strpbrk(s1
, c
)) != NULL
) {
1799 s
= skip_whitespace(s
);
1803 while (*s
&& !isspace(*s
))
1810 static void split_f0(void)
1812 /* static char *fstrings; */
1813 #define fstrings (G.split_f0__fstrings)
1824 n
= awk_split(getvar_s(intvar
[F0
]), &fsplitter
.n
, &fstrings
);
1827 for (i
= 0; i
< n
; i
++) {
1828 Fields
[i
].string
= nextword(&s
);
1829 Fields
[i
].type
|= (VF_FSTR
| VF_USER
| VF_DIRTY
);
1832 /* set NF manually to avoid side effects */
1834 intvar
[NF
]->type
= VF_NUMBER
| VF_SPECIAL
;
1835 intvar
[NF
]->number
= nfields
;
1839 /* perform additional actions when some internal variables changed */
1840 static void handle_special(var
*v
)
1844 const char *sep
, *s
;
1845 int sl
, l
, len
, i
, bsize
;
1847 if (!(v
->type
& VF_SPECIAL
))
1850 if (v
== intvar
[NF
]) {
1851 n
= (int)getvar_i(v
);
1854 /* recalculate $0 */
1855 sep
= getvar_s(intvar
[OFS
]);
1859 for (i
= 0; i
< n
; i
++) {
1860 s
= getvar_s(&Fields
[i
]);
1863 memcpy(b
+len
, sep
, sl
);
1866 b
= qrealloc(b
, len
+l
+sl
, &bsize
);
1867 memcpy(b
+len
, s
, l
);
1872 setvar_p(intvar
[F0
], b
);
1875 } else if (v
== intvar
[F0
]) {
1876 is_f0_split
= FALSE
;
1878 } else if (v
== intvar
[FS
]) {
1880 * The POSIX-2008 standard says that changing FS should have no effect on the
1881 * current input line, but only on the next one. The language is:
1883 * > Before the first reference to a field in the record is evaluated, the record
1884 * > shall be split into fields, according to the rules in Regular Expressions,
1885 * > using the value of FS that was current at the time the record was read.
1887 * So, split up current line before assignment to FS:
1891 mk_splitter(getvar_s(v
), &fsplitter
);
1892 } else if (v
== intvar
[RS
]) {
1893 mk_splitter(getvar_s(v
), &rsplitter
);
1894 } else if (v
== intvar
[IGNORECASE
]) {
1897 n
= getvar_i(intvar
[NF
]);
1898 setvar_i(intvar
[NF
], n
> v
-Fields
? n
: v
-Fields
+1);
1899 /* right here v is invalid. Just to note... */
1903 /* step through func/builtin/etc arguments */
1904 static node
*nextarg(node
**pn
)
1909 if (n
&& (n
->info
& OPCLSMASK
) == OC_COMMA
) {
1918 static void hashwalk_init(var
*v
, xhash
*array
)
1923 walker_list
*prev_walker
;
1925 if (v
->type
& VF_WALK
) {
1926 prev_walker
= v
->x
.walker
;
1931 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker
);
1933 w
= v
->x
.walker
= xzalloc(sizeof(*w
) + array
->glen
+ 1); /* why + 1? */
1934 debug_printf_walker(" walker@%p=%p\n", &v
->x
.walker
, w
);
1935 w
->cur
= w
->end
= w
->wbuf
;
1936 w
->prev
= prev_walker
;
1937 for (i
= 0; i
< array
->csize
; i
++) {
1938 hi
= array
->items
[i
];
1940 strcpy(w
->end
, hi
->name
);
1947 static int hashwalk_next(var
*v
)
1949 walker_list
*w
= v
->x
.walker
;
1951 if (w
->cur
>= w
->end
) {
1952 walker_list
*prev_walker
= w
->prev
;
1954 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v
->x
.walker
, w
, prev_walker
);
1956 v
->x
.walker
= prev_walker
;
1960 setvar_s(v
, nextword(&w
->cur
));
1964 /* evaluate node, return 1 when result is true, 0 otherwise */
1965 static int ptest(node
*pattern
)
1967 /* ptest__v is "static": to save stack space? */
1968 return istrue(evaluate(pattern
, &G
.ptest__v
));
1971 /* read next record from stream rsm into a variable v */
1972 static int awk_getline(rstream
*rsm
, var
*v
)
1975 regmatch_t pmatch
[2];
1976 int size
, a
, p
, pp
= 0;
1977 int fd
, so
, eo
, r
, rp
;
1980 debug_printf_eval("entered %s()\n", __func__
);
1982 /* we're using our own buffer since we need access to accumulating
1985 fd
= fileno(rsm
->F
);
1990 c
= (char) rsplitter
.n
.info
;
1994 m
= qrealloc(m
, 256, &size
);
2001 if ((rsplitter
.n
.info
& OPCLSMASK
) == OC_REGEXP
) {
2002 if (regexec(icase
? rsplitter
.n
.r
.ire
: rsplitter
.n
.l
.re
,
2003 b
, 1, pmatch
, 0) == 0) {
2004 so
= pmatch
[0].rm_so
;
2005 eo
= pmatch
[0].rm_eo
;
2009 } else if (c
!= '\0') {
2010 s
= strchr(b
+pp
, c
);
2012 s
= memchr(b
+pp
, '\0', p
- pp
);
2019 while (b
[rp
] == '\n')
2021 s
= strstr(b
+rp
, "\n\n");
2024 while (b
[eo
] == '\n')
2033 memmove(m
, m
+a
, p
+1);
2038 m
= qrealloc(m
, a
+p
+128, &size
);
2041 p
+= safe_read(fd
, b
+p
, size
-p
-1);
2045 setvar_i(intvar
[ERRNO
], errno
);
2054 c
= b
[so
]; b
[so
] = '\0';
2058 c
= b
[eo
]; b
[eo
] = '\0';
2059 setvar_s(intvar
[RT
], b
+so
);
2068 debug_printf_eval("returning from %s(): %d\n", __func__
, r
);
2073 static int fmt_num(char *b
, int size
, const char *format
, double n
, int int_as_int
)
2077 const char *s
= format
;
2079 if (int_as_int
&& n
== (long long)n
) {
2080 r
= snprintf(b
, size
, "%lld", (long long)n
);
2082 do { c
= *s
; } while (c
&& *++s
);
2083 if (strchr("diouxX", c
)) {
2084 r
= snprintf(b
, size
, format
, (int)n
);
2085 } else if (strchr("eEfgG", c
)) {
2086 r
= snprintf(b
, size
, format
, n
);
2088 syntax_error(EMSG_INV_FMT
);
2094 /* formatted output into an allocated buffer, return ptr to buffer */
2095 static char *awk_printf(node
*n
)
2100 int i
, j
, incr
, bsize
;
2105 fmt
= f
= xstrdup(getvar_s(evaluate(nextarg(&n
), v
)));
2110 while (*f
&& (*f
!= '%' || *++f
== '%'))
2112 while (*f
&& !isalpha(*f
)) {
2114 syntax_error("%*x formats are not supported");
2118 incr
= (f
- s
) + MAXVARFMT
;
2119 b
= qrealloc(b
, incr
+ i
, &bsize
);
2125 arg
= evaluate(nextarg(&n
), v
);
2128 if (c
== 'c' || !c
) {
2129 i
+= sprintf(b
+i
, s
, is_numeric(arg
) ?
2130 (char)getvar_i(arg
) : *getvar_s(arg
));
2131 } else if (c
== 's') {
2133 b
= qrealloc(b
, incr
+i
+strlen(s1
), &bsize
);
2134 i
+= sprintf(b
+i
, s
, s1
);
2136 i
+= fmt_num(b
+i
, incr
, s
, getvar_i(arg
), FALSE
);
2140 /* if there was an error while sprintf, return value is negative */
2147 b
= xrealloc(b
, i
+ 1);
2152 /* Common substitution routine.
2153 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2154 * store result into (dest), return number of substitutions.
2155 * If nm = 0, replace all matches.
2156 * If src or dst is NULL, use $0.
2157 * If subexp != 0, enable subexpression matching (\1-\9).
2159 static int awk_sub(node
*rn
, const char *repl
, int nm
, var
*src
, var
*dest
, int subexp
)
2163 int match_no
, residx
, replen
, resbufsize
;
2165 regmatch_t pmatch
[10];
2166 regex_t sreg
, *regex
;
2172 regex
= as_regex(rn
, &sreg
);
2173 sp
= getvar_s(src
? src
: intvar
[F0
]);
2174 replen
= strlen(repl
);
2175 while (regexec(regex
, sp
, 10, pmatch
, regexec_flags
) == 0) {
2176 int so
= pmatch
[0].rm_so
;
2177 int eo
= pmatch
[0].rm_eo
;
2179 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2180 resbuf
= qrealloc(resbuf
, residx
+ eo
+ replen
, &resbufsize
);
2181 memcpy(resbuf
+ residx
, sp
, eo
);
2183 if (++match_no
>= nm
) {
2188 residx
-= (eo
- so
);
2190 for (s
= repl
; *s
; s
++) {
2191 char c
= resbuf
[residx
++] = *s
;
2196 if (c
== '&' || (subexp
&& c
>= '0' && c
<= '9')) {
2198 residx
-= ((nbs
+ 3) >> 1);
2205 resbuf
[residx
++] = c
;
2207 int n
= pmatch
[j
].rm_eo
- pmatch
[j
].rm_so
;
2208 resbuf
= qrealloc(resbuf
, residx
+ replen
+ n
, &resbufsize
);
2209 memcpy(resbuf
+ residx
, sp
+ pmatch
[j
].rm_so
, n
);
2217 regexec_flags
= REG_NOTBOL
;
2222 /* Empty match (e.g. "b*" will match anywhere).
2223 * Advance by one char. */
2225 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2226 //... and will erroneously match "b" even though it is NOT at the word start.
2227 //we need REG_NOTBOW but it does not exist...
2228 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2229 //it should be able to do it correctly.
2230 /* Subtle: this is safe only because
2231 * qrealloc allocated at least one extra byte */
2232 resbuf
[residx
] = *sp
;
2240 resbuf
= qrealloc(resbuf
, residx
+ strlen(sp
), &resbufsize
);
2241 strcpy(resbuf
+ residx
, sp
);
2243 //bb_error_msg("end sp:'%s'%p", sp,sp);
2244 setvar_p(dest
? dest
: intvar
[F0
], resbuf
);
2250 static NOINLINE
int do_mktime(const char *ds
)
2255 /*memset(&then, 0, sizeof(then)); - not needed */
2256 then
.tm_isdst
= -1; /* default is unknown */
2258 /* manpage of mktime says these fields are ints,
2259 * so we can sscanf stuff directly into them */
2260 count
= sscanf(ds
, "%u %u %u %u %u %u %d",
2261 &then
.tm_year
, &then
.tm_mon
, &then
.tm_mday
,
2262 &then
.tm_hour
, &then
.tm_min
, &then
.tm_sec
,
2266 || (unsigned)then
.tm_mon
< 1
2267 || (unsigned)then
.tm_year
< 1900
2273 then
.tm_year
-= 1900;
2275 return mktime(&then
);
2278 static NOINLINE var
*exec_builtin(node
*op
, var
*res
)
2280 #define tspl (G.exec_builtin__tspl)
2286 regmatch_t pmatch
[2];
2295 isr
= info
= op
->info
;
2298 av
[2] = av
[3] = NULL
;
2299 for (i
= 0; i
< 4 && op
; i
++) {
2300 an
[i
] = nextarg(&op
);
2301 if (isr
& 0x09000000)
2302 av
[i
] = evaluate(an
[i
], &tv
[i
]);
2303 if (isr
& 0x08000000)
2304 as
[i
] = getvar_s(av
[i
]);
2309 if ((uint32_t)nargs
< (info
>> 30))
2310 syntax_error(EMSG_TOO_FEW_ARGS
);
2316 if (ENABLE_FEATURE_AWK_LIBM
)
2317 setvar_i(res
, atan2(getvar_i(av
[0]), getvar_i(av
[1])));
2319 syntax_error(EMSG_NO_MATH
);
2326 spl
= (an
[2]->info
& OPCLSMASK
) == OC_REGEXP
?
2327 an
[2] : mk_splitter(getvar_s(evaluate(an
[2], &tv
[2])), &tspl
);
2332 n
= awk_split(as
[0], spl
, &s
);
2334 clear_array(iamarray(av
[1]));
2335 for (i
= 1; i
<= n
; i
++)
2336 setari_u(av
[1], i
, nextword(&s
));
2346 i
= getvar_i(av
[1]) - 1;
2351 n
= (nargs
> 2) ? getvar_i(av
[2]) : l
-i
;
2354 s
= xstrndup(as
[0]+i
, n
);
2359 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2360 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2362 setvar_i(res
, getvar_i_int(av
[0]) & getvar_i_int(av
[1]));
2366 setvar_i(res
, ~getvar_i_int(av
[0]));
2370 setvar_i(res
, getvar_i_int(av
[0]) << getvar_i_int(av
[1]));
2374 setvar_i(res
, getvar_i_int(av
[0]) | getvar_i_int(av
[1]));
2378 setvar_i(res
, getvar_i_int(av
[0]) >> getvar_i_int(av
[1]));
2382 setvar_i(res
, getvar_i_int(av
[0]) ^ getvar_i_int(av
[1]));
2388 s1
= s
= xstrdup(as
[0]);
2390 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2391 if ((unsigned char)((*s1
| 0x20) - 'a') <= ('z' - 'a'))
2392 *s1
= (info
== B_up
) ? (*s1
& 0xdf) : (*s1
| 0x20);
2402 l
= strlen(as
[0]) - ll
;
2403 if (ll
> 0 && l
>= 0) {
2405 char *s
= strstr(as
[0], as
[1]);
2407 n
= (s
- as
[0]) + 1;
2409 /* this piece of code is terribly slow and
2410 * really should be rewritten
2412 for (i
= 0; i
<= l
; i
++) {
2413 if (strncasecmp(as
[0]+i
, as
[1], ll
) == 0) {
2425 tt
= getvar_i(av
[1]);
2428 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2429 i
= strftime(g_buf
, MAXVARFMT
,
2430 ((nargs
> 0) ? as
[0] : "%a %b %d %H:%M:%S %Z %Y"),
2433 setvar_s(res
, g_buf
);
2437 setvar_i(res
, do_mktime(as
[0]));
2441 re
= as_regex(an
[1], &sreg
);
2442 n
= regexec(re
, as
[0], 1, pmatch
, 0);
2447 pmatch
[0].rm_so
= 0;
2448 pmatch
[0].rm_eo
= -1;
2450 setvar_i(newvar("RSTART"), pmatch
[0].rm_so
);
2451 setvar_i(newvar("RLENGTH"), pmatch
[0].rm_eo
- pmatch
[0].rm_so
);
2452 setvar_i(res
, pmatch
[0].rm_so
);
2458 awk_sub(an
[0], as
[1], getvar_i(av
[2]), av
[3], res
, TRUE
);
2462 setvar_i(res
, awk_sub(an
[0], as
[1], 0, av
[2], av
[2], FALSE
));
2466 setvar_i(res
, awk_sub(an
[0], as
[1], 1, av
[2], av
[2], FALSE
));
2476 * Evaluate node - the heart of the program. Supplied with subtree
2477 * and place where to store result. returns ptr to result.
2479 #define XC(n) ((n) >> 8)
2481 static var
*evaluate(node
*op
, var
*res
)
2483 /* This procedure is recursive so we should count every byte */
2484 #define fnargs (G.evaluate__fnargs)
2485 /* seed is initialized to 1 */
2486 #define seed (G.evaluate__seed)
2487 #define sreg (G.evaluate__sreg)
2492 return setvar_s(res
, NULL
);
2494 debug_printf_eval("entered %s()\n", __func__
);
2502 } L
= L
; /* for compiler */
2513 opn
= (opinfo
& OPNMASK
);
2514 g_lineno
= op
->lineno
;
2516 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo
, opn
);
2518 /* execute inevitable things */
2519 if (opinfo
& OF_RES1
)
2520 L
.v
= evaluate(op1
, v1
);
2521 if (opinfo
& OF_RES2
)
2522 R
.v
= evaluate(op
->r
.n
, v1
+1);
2523 if (opinfo
& OF_STR1
) {
2524 L
.s
= getvar_s(L
.v
);
2525 debug_printf_eval("L.s:'%s'\n", L
.s
);
2527 if (opinfo
& OF_STR2
) {
2528 R
.s
= getvar_s(R
.v
);
2529 debug_printf_eval("R.s:'%s'\n", R
.s
);
2531 if (opinfo
& OF_NUM1
) {
2532 L_d
= getvar_i(L
.v
);
2533 debug_printf_eval("L_d:%f\n", L_d
);
2536 debug_printf_eval("switch(0x%x)\n", XC(opinfo
& OPCLSMASK
));
2537 switch (XC(opinfo
& OPCLSMASK
)) {
2539 /* -- iterative node type -- */
2543 if ((op1
->info
& OPCLSMASK
) == OC_COMMA
) {
2544 /* it's range pattern */
2545 if ((opinfo
& OF_CHECKED
) || ptest(op1
->l
.n
)) {
2546 op
->info
|= OF_CHECKED
;
2547 if (ptest(op1
->r
.n
))
2548 op
->info
&= ~OF_CHECKED
;
2554 op
= ptest(op1
) ? op
->a
.n
: op
->r
.n
;
2558 /* just evaluate an expression, also used as unconditional jump */
2562 /* branch, used in if-else and various loops */
2564 op
= istrue(L
.v
) ? op
->a
.n
: op
->r
.n
;
2567 /* initialize for-in loop */
2568 case XC( OC_WALKINIT
):
2569 hashwalk_init(L
.v
, iamarray(R
.v
));
2572 /* get next array item */
2573 case XC( OC_WALKNEXT
):
2574 op
= hashwalk_next(L
.v
) ? op
->a
.n
: op
->r
.n
;
2577 case XC( OC_PRINT
):
2578 case XC( OC_PRINTF
): {
2582 rstream
*rsm
= newfile(R
.s
);
2585 rsm
->F
= popen(R
.s
, "w");
2587 bb_perror_msg_and_die("popen");
2590 rsm
->F
= xfopen(R
.s
, opn
=='w' ? "w" : "a");
2596 if ((opinfo
& OPCLSMASK
) == OC_PRINT
) {
2598 fputs(getvar_s(intvar
[F0
]), F
);
2601 var
*v
= evaluate(nextarg(&op1
), v1
);
2602 if (v
->type
& VF_NUMBER
) {
2603 fmt_num(g_buf
, MAXVARFMT
, getvar_s(intvar
[OFMT
]),
2607 fputs(getvar_s(v
), F
);
2611 fputs(getvar_s(intvar
[OFS
]), F
);
2614 fputs(getvar_s(intvar
[ORS
]), F
);
2616 } else { /* OC_PRINTF */
2617 char *s
= awk_printf(op1
);
2625 case XC( OC_DELETE
): {
2626 uint32_t info
= op1
->info
& OPCLSMASK
;
2629 if (info
== OC_VAR
) {
2631 } else if (info
== OC_FNARG
) {
2632 v
= &fnargs
[op1
->l
.aidx
];
2634 syntax_error(EMSG_NOT_ARRAY
);
2640 s
= getvar_s(evaluate(op1
->r
.n
, v1
));
2641 hash_remove(iamarray(v
), s
);
2643 clear_array(iamarray(v
));
2648 case XC( OC_NEWSOURCE
):
2649 g_progname
= op
->l
.new_progname
;
2652 case XC( OC_RETURN
):
2656 case XC( OC_NEXTFILE
):
2667 /* -- recursive node type -- */
2671 if (L
.v
== intvar
[NF
])
2675 case XC( OC_FNARG
):
2676 L
.v
= &fnargs
[op
->l
.aidx
];
2678 res
= op
->r
.n
? findvar(iamarray(L
.v
), R
.s
) : L
.v
;
2682 setvar_i(res
, hash_search(iamarray(R
.v
), L
.s
) ? 1 : 0);
2685 case XC( OC_REGEXP
):
2687 L
.s
= getvar_s(intvar
[F0
]);
2690 case XC( OC_MATCH
):
2694 regex_t
*re
= as_regex(op1
, &sreg
);
2695 int i
= regexec(re
, L
.s
, 0, NULL
, 0);
2698 setvar_i(res
, (i
== 0) ^ (opn
== '!'));
2703 debug_printf_eval("MOVE\n");
2704 /* if source is a temporary string, jusk relink it to dest */
2705 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2706 //then L.v ends up being a string, which is wrong
2707 // if (R.v == v1+1 && R.v->string) {
2708 // res = setvar_p(L.v, R.v->string);
2709 // R.v->string = NULL;
2711 res
= copyvar(L
.v
, R
.v
);
2715 case XC( OC_TERNARY
):
2716 if ((op
->r
.n
->info
& OPCLSMASK
) != OC_COLON
)
2717 syntax_error(EMSG_POSSIBLE_ERROR
);
2718 res
= evaluate(istrue(L
.v
) ? op
->r
.n
->l
.n
: op
->r
.n
->r
.n
, res
);
2721 case XC( OC_FUNC
): {
2723 const char *sv_progname
;
2725 /* The body might be empty, still has to eval the args */
2726 if (!op
->r
.n
->info
&& !op
->r
.f
->body
.first
)
2727 syntax_error(EMSG_UNDEF_FUNC
);
2729 vbeg
= v
= nvalloc(op
->r
.f
->nargs
+ 1);
2731 var
*arg
= evaluate(nextarg(&op1
), v1
);
2733 v
->type
|= VF_CHILD
;
2735 if (++v
- vbeg
>= op
->r
.f
->nargs
)
2741 sv_progname
= g_progname
;
2743 res
= evaluate(op
->r
.f
->body
.first
, res
);
2745 g_progname
= sv_progname
;
2752 case XC( OC_GETLINE
):
2753 case XC( OC_PGETLINE
): {
2760 if ((opinfo
& OPCLSMASK
) == OC_PGETLINE
) {
2761 rsm
->F
= popen(L
.s
, "r");
2762 rsm
->is_pipe
= TRUE
;
2764 rsm
->F
= fopen_for_read(L
.s
); /* not xfopen! */
2769 iF
= next_input_file();
2773 if (!rsm
|| !rsm
->F
) {
2774 setvar_i(intvar
[ERRNO
], errno
);
2782 i
= awk_getline(rsm
, R
.v
);
2783 if (i
> 0 && !op1
) {
2784 incvar(intvar
[FNR
]);
2791 /* simple builtins */
2792 case XC( OC_FBLTIN
): {
2793 double R_d
= R_d
; /* for compiler */
2797 R_d
= (long long)L_d
;
2801 R_d
= (double)rand() / (double)RAND_MAX
;
2805 if (ENABLE_FEATURE_AWK_LIBM
) {
2811 if (ENABLE_FEATURE_AWK_LIBM
) {
2817 if (ENABLE_FEATURE_AWK_LIBM
) {
2823 if (ENABLE_FEATURE_AWK_LIBM
) {
2829 if (ENABLE_FEATURE_AWK_LIBM
) {
2834 syntax_error(EMSG_NO_MATH
);
2839 seed
= op1
? (unsigned)L_d
: (unsigned)time(NULL
);
2848 debug_printf_eval("length: L.s:'%s'\n", L
.s
);
2850 L
.s
= getvar_s(intvar
[F0
]);
2851 debug_printf_eval("length: L.s='%s'\n", L
.s
);
2853 else if (L
.v
->type
& VF_ARRAY
) {
2854 R_d
= L
.v
->x
.array
->nel
;
2855 debug_printf_eval("length: array_len:%d\n", L
.v
->x
.array
->nel
);
2863 R_d
= (ENABLE_FEATURE_ALLOW_EXEC
&& L
.s
&& *L
.s
)
2864 ? (system(L
.s
) >> 8) : 0;
2870 } else if (L
.s
&& *L
.s
) {
2871 rstream
*rsm
= newfile(L
.s
);
2881 rsm
= (rstream
*)hash_search(fdhash
, L
.s
);
2882 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm
);
2884 debug_printf_eval("OC_FBLTIN F_cl "
2885 "rsm->is_pipe:%d, ->F:%p\n",
2886 rsm
->is_pipe
, rsm
->F
);
2887 /* Can be NULL if open failed. Example:
2888 * getline line <"doesnt_exist";
2889 * close("doesnt_exist"); <--- here rsm->F is NULL
2892 err
= rsm
->is_pipe
? pclose(rsm
->F
) : fclose(rsm
->F
);
2894 hash_remove(fdhash
, L
.s
);
2897 setvar_i(intvar
[ERRNO
], errno
);
2906 case XC( OC_BUILTIN
):
2907 res
= exec_builtin(op
, res
);
2910 case XC( OC_SPRINTF
):
2911 setvar_p(res
, awk_printf(op1
));
2914 case XC( OC_UNARY
): {
2917 Ld
= R_d
= getvar_i(R
.v
);
2944 case XC( OC_FIELD
): {
2945 int i
= (int)getvar_i(R
.v
);
2952 res
= &Fields
[i
- 1];
2957 /* concatenation (" ") and index joining (",") */
2958 case XC( OC_CONCAT
):
2959 case XC( OC_COMMA
): {
2960 const char *sep
= "";
2961 if ((opinfo
& OPCLSMASK
) == OC_COMMA
)
2962 sep
= getvar_s(intvar
[SUBSEP
]);
2963 setvar_p(res
, xasprintf("%s%s%s", L
.s
, sep
, R
.s
));
2968 setvar_i(res
, istrue(L
.v
) ? ptest(op
->r
.n
) : 0);
2972 setvar_i(res
, istrue(L
.v
) ? 1 : ptest(op
->r
.n
));
2975 case XC( OC_BINARY
):
2976 case XC( OC_REPLACE
): {
2977 double R_d
= getvar_i(R
.v
);
2978 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d
, opn
);
2991 syntax_error(EMSG_DIV_BY_ZERO
);
2995 if (ENABLE_FEATURE_AWK_LIBM
)
2996 L_d
= pow(L_d
, R_d
);
2998 syntax_error(EMSG_NO_MATH
);
3002 syntax_error(EMSG_DIV_BY_ZERO
);
3003 L_d
-= (long long)(L_d
/ R_d
) * R_d
;
3006 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d
);
3007 res
= setvar_i(((opinfo
& OPCLSMASK
) == OC_BINARY
) ? res
: L
.v
, L_d
);
3011 case XC( OC_COMPARE
): {
3012 int i
= i
; /* for compiler */
3015 if (is_numeric(L
.v
) && is_numeric(R
.v
)) {
3016 Ld
= getvar_i(L
.v
) - getvar_i(R
.v
);
3018 const char *l
= getvar_s(L
.v
);
3019 const char *r
= getvar_s(R
.v
);
3020 Ld
= icase
? strcasecmp(l
, r
) : strcmp(l
, r
);
3022 switch (opn
& 0xfe) {
3033 setvar_i(res
, (i
== 0) ^ (opn
& 1));
3038 syntax_error(EMSG_POSSIBLE_ERROR
);
3040 if ((opinfo
& OPCLSMASK
) <= SHIFT_TIL_THIS
)
3042 if ((opinfo
& OPCLSMASK
) >= RECUR_FROM_THIS
)
3049 debug_printf_eval("returning from %s(): %p\n", __func__
, res
);
3057 /* -------- main & co. -------- */
3059 static int awk_exit(int r
)
3070 evaluate(endseq
.first
, &tv
);
3073 /* waiting for children */
3074 for (i
= 0; i
< fdhash
->csize
; i
++) {
3075 hi
= fdhash
->items
[i
];
3077 if (hi
->data
.rs
.F
&& hi
->data
.rs
.is_pipe
)
3078 pclose(hi
->data
.rs
.F
);
3086 /* if expr looks like "var=value", perform assignment and return 1,
3087 * otherwise return 0 */
3088 static int is_assignment(const char *expr
)
3092 if (!isalnum_(*expr
) || (val
= strchr(expr
, '=')) == NULL
) {
3096 exprc
= xstrdup(expr
);
3097 val
= exprc
+ (val
- expr
);
3100 unescape_string_in_place(val
);
3101 setvar_u(newvar(exprc
), val
);
3106 /* switch to next input file */
3107 static rstream
*next_input_file(void)
3109 #define rsm (G.next_input_file__rsm)
3110 #define files_happen (G.next_input_file__files_happen)
3113 const char *fname
, *ind
;
3118 rsm
.pos
= rsm
.adv
= 0;
3121 if (getvar_i(intvar
[ARGIND
])+1 >= getvar_i(intvar
[ARGC
])) {
3128 ind
= getvar_s(incvar(intvar
[ARGIND
]));
3129 fname
= getvar_s(findvar(iamarray(intvar
[ARGV
]), ind
));
3130 if (fname
&& *fname
&& !is_assignment(fname
)) {
3131 F
= xfopen_stdin(fname
);
3136 files_happen
= TRUE
;
3137 setvar_s(intvar
[FILENAME
], fname
);
3144 int awk_main(int argc
, char **argv
) MAIN_EXTERNALLY_VISIBLE
;
3145 int awk_main(int argc
, char **argv
)
3149 llist_t
*list_v
= NULL
;
3150 llist_t
*list_f
= NULL
;
3151 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3152 llist_t
*list_e
= NULL
;
3158 char *vnames
= (char *)vNames
; /* cheat */
3159 char *vvalues
= (char *)vValues
;
3163 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3164 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3165 if (ENABLE_LOCALE_SUPPORT
)
3166 setlocale(LC_NUMERIC
, "C");
3170 /* allocate global buffer */
3171 g_buf
= xmalloc(MAXVARFMT
+ 1);
3173 vhash
= hash_init();
3174 ahash
= hash_init();
3175 fdhash
= hash_init();
3176 fnhash
= hash_init();
3178 /* initialize variables */
3179 for (i
= 0; *vnames
; i
++) {
3180 intvar
[i
] = v
= newvar(nextword(&vnames
));
3181 if (*vvalues
!= '\377')
3182 setvar_s(v
, nextword(&vvalues
));
3186 if (*vnames
== '*') {
3187 v
->type
|= VF_SPECIAL
;
3192 handle_special(intvar
[FS
]);
3193 handle_special(intvar
[RS
]);
3195 newfile("/dev/stdin")->F
= stdin
;
3196 newfile("/dev/stdout")->F
= stdout
;
3197 newfile("/dev/stderr")->F
= stderr
;
3199 /* Huh, people report that sometimes environ is NULL. Oh well. */
3200 if (environ
) for (envp
= environ
; *envp
; envp
++) {
3201 /* environ is writable, thus we don't strdup it needlessly */
3203 char *s1
= strchr(s
, '=');
3206 /* Both findvar and setvar_u take const char*
3207 * as 2nd arg -> environment is not trashed */
3208 setvar_u(findvar(iamarray(intvar
[ENVIRON
]), s
), s1
+ 1);
3212 opt_complementary
= OPTCOMPLSTR_AWK
;
3213 opt
= getopt32(argv
, OPTSTR_AWK
, &opt_F
, &list_v
, &list_f
, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e
,) NULL
);
3217 bb_error_msg("warning: option -W is ignored");
3219 unescape_string_in_place(opt_F
);
3220 setvar_s(intvar
[FS
], opt_F
);
3223 if (!is_assignment(llist_pop(&list_v
)))
3230 g_progname
= llist_pop(&list_f
);
3231 from_file
= xfopen_stdin(g_progname
);
3232 /* one byte is reserved for some trick in next_token */
3233 for (i
= j
= 1; j
> 0; i
+= j
) {
3234 s
= xrealloc(s
, i
+ 4096);
3235 j
= fread(s
+ i
, 1, 4094, from_file
);
3239 parse_program(s
+ 1);
3242 g_progname
= "cmd. line";
3243 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3245 parse_program(llist_pop(&list_e
));
3248 if (!(opt
& (OPT_f
| OPT_e
))) {
3251 parse_program(*argv
++);
3255 /* fill in ARGV array */
3256 setvar_i(intvar
[ARGC
], argc
+ 1);
3257 setari_u(intvar
[ARGV
], 0, "awk");
3260 setari_u(intvar
[ARGV
], ++i
, *argv
++);
3262 evaluate(beginseq
.first
, &tv
);
3263 if (!mainseq
.first
&& !endseq
.first
)
3264 awk_exit(EXIT_SUCCESS
);
3266 /* input file could already be opened in BEGIN block */
3268 iF
= next_input_file();
3270 /* passing through input files */
3273 setvar_i(intvar
[FNR
], 0);
3275 while ((i
= awk_getline(iF
, intvar
[F0
])) > 0) {
3278 incvar(intvar
[FNR
]);
3279 evaluate(mainseq
.first
, &tv
);
3286 syntax_error(strerror(errno
));
3288 iF
= next_input_file();
3291 awk_exit(EXIT_SUCCESS
);