1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
28 #define debug_printf_parse(...) do {} while (0)
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
46 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
47 #define VF_ARRAY 0x0002 /* 1 = it's an array */
49 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
50 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
52 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
60 typedef struct walker_list
{
63 struct walker_list
*prev
;
68 typedef struct var_s
{
69 unsigned type
; /* flags */
73 int aidx
; /* func arg idx (for compilation stage) */
74 struct xhash_s
*array
; /* array ptr */
75 struct var_s
*parent
; /* for func args, ptr to actual parameter */
76 walker_list
*walker
; /* list of array elements (for..in) */
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s
{
84 const char *programname
;
88 typedef struct func_s
{
94 typedef struct rstream_s
{
103 typedef struct hash_item_s
{
105 struct var_s v
; /* variable/array hash */
106 struct rstream_s rs
; /* redirect streams hash */
107 struct func_s f
; /* functions hash */
109 struct hash_item_s
*next
; /* next in chain */
110 char name
[1]; /* really it's longer */
113 typedef struct xhash_s
{
114 unsigned nel
; /* num of elements */
115 unsigned csize
; /* current hash size */
116 unsigned nprime
; /* next hash size in PRIMES[] */
117 unsigned glen
; /* summary length of item names */
118 struct hash_item_s
**items
;
122 typedef struct node_s
{
142 /* Block of temporary variables */
143 typedef struct nvblock_s
{
146 struct nvblock_s
*prev
;
147 struct nvblock_s
*next
;
151 typedef struct tsplitter_s
{
156 /* simple token classes */
157 /* Order and hex values are very important!!! See next_token() */
158 #define TC_SEQSTART 1 /* ( */
159 #define TC_SEQTERM (1 << 1) /* ) */
160 #define TC_REGEXP (1 << 2) /* /.../ */
161 #define TC_OUTRDR (1 << 3) /* | > >> */
162 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
163 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
164 #define TC_BINOPX (1 << 6) /* two-opnd operator */
165 #define TC_IN (1 << 7)
166 #define TC_COMMA (1 << 8)
167 #define TC_PIPE (1 << 9) /* input redirection pipe */
168 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
169 #define TC_ARRTERM (1 << 11) /* ] */
170 #define TC_GRPSTART (1 << 12) /* { */
171 #define TC_GRPTERM (1 << 13) /* } */
172 #define TC_SEMICOL (1 << 14)
173 #define TC_NEWLINE (1 << 15)
174 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
175 #define TC_WHILE (1 << 17)
176 #define TC_ELSE (1 << 18)
177 #define TC_BUILTIN (1 << 19)
178 #define TC_GETLINE (1 << 20)
179 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
180 #define TC_BEGIN (1 << 22)
181 #define TC_END (1 << 23)
182 #define TC_EOF (1 << 24)
183 #define TC_VARIABLE (1 << 25)
184 #define TC_ARRAY (1 << 26)
185 #define TC_FUNCTION (1 << 27)
186 #define TC_STRING (1 << 28)
187 #define TC_NUMBER (1 << 29)
189 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
191 /* combined token classes */
192 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
204 /* discard newlines after these */
205 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206 | TC_BINOP | TC_OPTERM)
208 /* what can expression begin with */
209 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216 | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
219 #define OF_RES1 0x010000
220 #define OF_RES2 0x020000
221 #define OF_STR1 0x040000
222 #define OF_STR2 0x080000
223 #define OF_NUM1 0x100000
224 #define OF_CHECKED 0x200000
226 /* combined operator flags */
229 #define xS (OF_RES2 | OF_STR2)
231 #define VV (OF_RES1 | OF_RES2)
232 #define Nx (OF_RES1 | OF_NUM1)
233 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx (OF_RES1 | OF_STR1)
235 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK 0x007F
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
248 #define P(x) (x << 24)
249 #define PRIMASK 0x7F000000
250 #define PRIMASK2 0x7E000000
252 /* Operation classes */
254 #define SHIFT_TIL_THIS 0x0600
255 #define RECUR_FROM_THIS 0x1000
258 OC_DELETE
= 0x0100, OC_EXEC
= 0x0200, OC_NEWSOURCE
= 0x0300,
259 OC_PRINT
= 0x0400, OC_PRINTF
= 0x0500, OC_WALKINIT
= 0x0600,
261 OC_BR
= 0x0700, OC_BREAK
= 0x0800, OC_CONTINUE
= 0x0900,
262 OC_EXIT
= 0x0a00, OC_NEXT
= 0x0b00, OC_NEXTFILE
= 0x0c00,
263 OC_TEST
= 0x0d00, OC_WALKNEXT
= 0x0e00,
265 OC_BINARY
= 0x1000, OC_BUILTIN
= 0x1100, OC_COLON
= 0x1200,
266 OC_COMMA
= 0x1300, OC_COMPARE
= 0x1400, OC_CONCAT
= 0x1500,
267 OC_FBLTIN
= 0x1600, OC_FIELD
= 0x1700, OC_FNARG
= 0x1800,
268 OC_FUNC
= 0x1900, OC_GETLINE
= 0x1a00, OC_IN
= 0x1b00,
269 OC_LAND
= 0x1c00, OC_LOR
= 0x1d00, OC_MATCH
= 0x1e00,
270 OC_MOVE
= 0x1f00, OC_PGETLINE
= 0x2000, OC_REGEXP
= 0x2100,
271 OC_REPLACE
= 0x2200, OC_RETURN
= 0x2300, OC_SPRINTF
= 0x2400,
272 OC_TERNARY
= 0x2500, OC_UNARY
= 0x2600, OC_VAR
= 0x2700,
275 ST_IF
= 0x3000, ST_DO
= 0x3100, ST_FOR
= 0x3200,
279 /* simple builtins */
281 F_in
, F_rn
, F_co
, F_ex
, F_lg
, F_si
, F_sq
, F_sr
,
282 F_ti
, F_le
, F_sy
, F_ff
, F_cl
287 B_a2
, B_ix
, B_ma
, B_sp
, B_ss
, B_ti
, B_mt
, B_lo
, B_up
,
289 B_an
, B_co
, B_ls
, B_or
, B_rs
, B_xo
,
292 /* tokens and their corresponding info values */
294 #define NTC "\377" /* switch to next token class (tc<<1) */
297 #define OC_B OC_BUILTIN
299 static const char tokenlist
[] ALIGN1
=
302 "\1/" NTC
/* REGEXP */
303 "\2>>" "\1>" "\1|" NTC
/* OUTRDR */
304 "\2++" "\2--" NTC
/* UOPPOST */
305 "\2++" "\2--" "\1$" NTC
/* UOPPRE1 */
306 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
307 "\2*=" "\2/=" "\2%=" "\2^="
308 "\1+" "\1-" "\3**=" "\2**"
309 "\1/" "\1%" "\1^" "\1*"
310 "\2!=" "\2>=" "\2<=" "\1>"
311 "\1<" "\2!~" "\1~" "\2&&"
312 "\2||" "\1?" "\1:" NTC
316 "\1+" "\1-" "\1!" NTC
/* UOPPRE2 */
322 "\2if" "\2do" "\3for" "\5break" /* STATX */
323 "\10continue" "\6delete" "\5print"
324 "\6printf" "\4next" "\10nextfile"
325 "\6return" "\4exit" NTC
329 "\3and" "\5compl" "\6lshift" "\2or"
331 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
332 "\3cos" "\3exp" "\3int" "\3log"
333 "\4rand" "\3sin" "\4sqrt" "\5srand"
334 "\6gensub" "\4gsub" "\5index" "\6length"
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
339 "\4func" "\10function" NTC
342 /* compiler adds trailing "\0" */
345 static const uint32_t tokeninfo
[] = {
349 xS
|'a', xS
|'w', xS
|'|',
350 OC_UNARY
|xV
|P(9)|'p', OC_UNARY
|xV
|P(9)|'m',
351 OC_UNARY
|xV
|P(9)|'P', OC_UNARY
|xV
|P(9)|'M', OC_FIELD
|xV
|P(5),
352 OC_COMPARE
|VV
|P(39)|5, OC_MOVE
|VV
|P(74), OC_REPLACE
|NV
|P(74)|'+', OC_REPLACE
|NV
|P(74)|'-',
353 OC_REPLACE
|NV
|P(74)|'*', OC_REPLACE
|NV
|P(74)|'/', OC_REPLACE
|NV
|P(74)|'%', OC_REPLACE
|NV
|P(74)|'&',
354 OC_BINARY
|NV
|P(29)|'+', OC_BINARY
|NV
|P(29)|'-', OC_REPLACE
|NV
|P(74)|'&', OC_BINARY
|NV
|P(15)|'&',
355 OC_BINARY
|NV
|P(25)|'/', OC_BINARY
|NV
|P(25)|'%', OC_BINARY
|NV
|P(15)|'&', OC_BINARY
|NV
|P(25)|'*',
356 OC_COMPARE
|VV
|P(39)|4, OC_COMPARE
|VV
|P(39)|3, OC_COMPARE
|VV
|P(39)|0, OC_COMPARE
|VV
|P(39)|1,
357 OC_COMPARE
|VV
|P(39)|2, OC_MATCH
|Sx
|P(45)|'!', OC_MATCH
|Sx
|P(45)|'~', OC_LAND
|Vx
|P(55),
358 OC_LOR
|Vx
|P(59), OC_TERNARY
|Vx
|P(64)|'?', OC_COLON
|xx
|P(67)|':',
359 OC_IN
|SV
|P(49), /* in */
361 OC_PGETLINE
|SV
|P(37),
362 OC_UNARY
|xV
|P(19)|'+', OC_UNARY
|xV
|P(19)|'-', OC_UNARY
|xV
|P(19)|'!',
368 ST_IF
, ST_DO
, ST_FOR
, OC_BREAK
,
369 OC_CONTINUE
, OC_DELETE
|Vx
, OC_PRINT
,
370 OC_PRINTF
, OC_NEXT
, OC_NEXTFILE
,
371 OC_RETURN
|Vx
, OC_EXIT
|Nx
,
375 OC_B
|B_an
|P(0x83), OC_B
|B_co
|P(0x41), OC_B
|B_ls
|P(0x83), OC_B
|B_or
|P(0x83),
376 OC_B
|B_rs
|P(0x83), OC_B
|B_xo
|P(0x83),
377 OC_FBLTIN
|Sx
|F_cl
, OC_FBLTIN
|Sx
|F_sy
, OC_FBLTIN
|Sx
|F_ff
, OC_B
|B_a2
|P(0x83),
378 OC_FBLTIN
|Nx
|F_co
, OC_FBLTIN
|Nx
|F_ex
, OC_FBLTIN
|Nx
|F_in
, OC_FBLTIN
|Nx
|F_lg
,
379 OC_FBLTIN
|F_rn
, OC_FBLTIN
|Nx
|F_si
, OC_FBLTIN
|Nx
|F_sq
, OC_FBLTIN
|Nx
|F_sr
,
380 OC_B
|B_ge
|P(0xd6), OC_B
|B_gs
|P(0xb6), OC_B
|B_ix
|P(0x9b), OC_FBLTIN
|Sx
|F_le
,
381 OC_B
|B_ma
|P(0x89), OC_B
|B_sp
|P(0x8b), OC_SPRINTF
, OC_B
|B_su
|P(0xb6),
382 OC_B
|B_ss
|P(0x8f), OC_FBLTIN
|F_ti
, OC_B
|B_ti
|P(0x0b), OC_B
|B_mt
|P(0x0b),
383 OC_B
|B_lo
|P(0x49), OC_B
|B_up
|P(0x49),
390 /* internal variable names and their initial values */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
393 CONVFMT
, OFMT
, FS
, OFS
,
394 ORS
, RS
, RT
, FILENAME
,
395 SUBSEP
, F0
, ARGIND
, ARGC
,
396 ARGV
, ERRNO
, FNR
, NR
,
397 NF
, IGNORECASE
, ENVIRON
, NUM_INTERNAL_VARS
400 static const char vNames
[] ALIGN1
=
401 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
402 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
403 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
404 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
405 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
407 static const char vValues
[] ALIGN1
=
408 "%.6g\0" "%.6g\0" " \0" " \0"
409 "\n\0" "\n\0" "\0" "\0"
410 "\034\0" "\0" "\377";
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES
[] ALIGN2
= { 251, 1021, 4093, 16381, 65521 };
417 /* Globals. Split in two parts so that first one is addressed
418 * with (mostly short) negative offsets.
419 * NB: it's unsafe to put members of type "double"
420 * into globals2 (gcc may fail to align them).
424 chain beginseq
, mainseq
, endseq
;
426 node
*break_ptr
, *continue_ptr
;
428 xhash
*vhash
, *ahash
, *fdhash
, *fnhash
;
429 const char *g_progname
;
432 int maxfields
; /* used in fsrealloc() only */
441 smallint is_f0_split
;
445 uint32_t t_info
; /* often used */
450 var
*intvar
[NUM_INTERNAL_VARS
]; /* often used */
452 /* former statics from various functions */
453 char *split_f0__fstrings
;
455 uint32_t next_token__save_tclass
;
456 uint32_t next_token__save_info
;
457 uint32_t next_token__ltclass
;
458 smallint next_token__concat_inserted
;
460 smallint next_input_file__files_happen
;
461 rstream next_input_file__rsm
;
463 var
*evaluate__fnargs
;
464 unsigned evaluate__seed
;
465 regex_t evaluate__sreg
;
469 tsplitter exec_builtin__tspl
;
471 /* biggest and least used members go last */
472 tsplitter fsplitter
, rsplitter
;
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double (G1.t_double )
482 #define beginseq (G1.beginseq )
483 #define mainseq (G1.mainseq )
484 #define endseq (G1.endseq )
485 #define seq (G1.seq )
486 #define break_ptr (G1.break_ptr )
487 #define continue_ptr (G1.continue_ptr)
489 #define vhash (G1.vhash )
490 #define ahash (G1.ahash )
491 #define fdhash (G1.fdhash )
492 #define fnhash (G1.fnhash )
493 #define g_progname (G1.g_progname )
494 #define g_lineno (G1.g_lineno )
495 #define nfields (G1.nfields )
496 #define maxfields (G1.maxfields )
497 #define Fields (G1.Fields )
498 #define g_cb (G1.g_cb )
499 #define g_pos (G1.g_pos )
500 #define g_buf (G1.g_buf )
501 #define icase (G1.icase )
502 #define exiting (G1.exiting )
503 #define nextrec (G1.nextrec )
504 #define nextfile (G1.nextfile )
505 #define is_f0_split (G1.is_f0_split )
506 #define t_rollback (G1.t_rollback )
507 #define t_info (G.t_info )
508 #define t_tclass (G.t_tclass )
509 #define t_string (G.t_string )
510 #define t_lineno (G.t_lineno )
511 #define intvar (G.intvar )
512 #define fsplitter (G.fsplitter )
513 #define rsplitter (G.rsplitter )
514 #define INIT_G() do { \
515 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516 G.next_token__ltclass = TC_OPTERM; \
517 G.evaluate__seed = 1; \
521 /* function prototypes */
522 static void handle_special(var
*);
523 static node
*parse_expr(uint32_t);
524 static void chain_group(void);
525 static var
*evaluate(node
*, var
*);
526 static rstream
*next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN
;
530 /* ---- error handling ---- */
532 static const char EMSG_INTERNAL_ERROR
[] ALIGN1
= "Internal error";
533 static const char EMSG_UNEXP_EOS
[] ALIGN1
= "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN
[] ALIGN1
= "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO
[] ALIGN1
= "Division by zero";
536 static const char EMSG_INV_FMT
[] ALIGN1
= "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS
[] ALIGN1
= "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY
[] ALIGN1
= "Not an array";
539 static const char EMSG_POSSIBLE_ERROR
[] ALIGN1
= "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC
[] ALIGN1
= "Call to undefined function";
541 static const char EMSG_NO_MATH
[] ALIGN1
= "Math support is not compiled in";
543 static void zero_out_var(var
*vp
)
545 memset(vp
, 0, sizeof(*vp
));
548 static void syntax_error(const char *message
) NORETURN
;
549 static void syntax_error(const char *message
)
551 bb_error_msg_and_die("%s:%i: %s", g_progname
, g_lineno
, message
);
554 /* ---- hash stuff ---- */
556 static unsigned hashidx(const char *name
)
561 idx
= *name
++ + (idx
<< 6) - idx
;
565 /* create new hash */
566 static xhash
*hash_init(void)
570 newhash
= xzalloc(sizeof(*newhash
));
571 newhash
->csize
= FIRST_PRIME
;
572 newhash
->items
= xzalloc(FIRST_PRIME
* sizeof(newhash
->items
[0]));
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash
*hash
, const char *name
)
582 hi
= hash
->items
[hashidx(name
) % hash
->csize
];
584 if (strcmp(hi
->name
, name
) == 0)
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash
*hash
)
594 unsigned newsize
, i
, idx
;
595 hash_item
**newitems
, *hi
, *thi
;
597 if (hash
->nprime
== ARRAY_SIZE(PRIMES
))
600 newsize
= PRIMES
[hash
->nprime
++];
601 newitems
= xzalloc(newsize
* sizeof(newitems
[0]));
603 for (i
= 0; i
< hash
->csize
; i
++) {
608 idx
= hashidx(thi
->name
) % newsize
;
609 thi
->next
= newitems
[idx
];
615 hash
->csize
= newsize
;
616 hash
->items
= newitems
;
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash
*hash
, const char *name
)
626 hi
= hash_search(hash
, name
);
628 if (++hash
->nel
/ hash
->csize
> 10)
631 l
= strlen(name
) + 1;
632 hi
= xzalloc(sizeof(*hi
) + l
);
633 strcpy(hi
->name
, name
);
635 idx
= hashidx(name
) % hash
->csize
;
636 hi
->next
= hash
->items
[idx
];
637 hash
->items
[idx
] = hi
;
643 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
644 #define newvar(name) ((var*) hash_find(vhash, (name)))
645 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
648 static void hash_remove(xhash
*hash
, const char *name
)
650 hash_item
*hi
, **phi
;
652 phi
= &hash
->items
[hashidx(name
) % hash
->csize
];
655 if (strcmp(hi
->name
, name
) == 0) {
656 hash
->glen
-= (strlen(name
) + 1);
666 /* ------ some useful functions ------ */
668 static char *skip_spaces(char *p
)
671 if (*p
== '\\' && p
[1] == '\n') {
674 } else if (*p
!= ' ' && *p
!= '\t') {
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s
)
686 while (*(*s
)++ != '\0')
691 static char nextchar(char **s
)
698 c
= bb_process_escape_sequence((const char**)s
);
699 /* Example awk statement:
701 * we must treat \" as "
703 if (c
== '\\' && *s
== pps
) { /* unrecognized \z? */
704 c
= *(*s
); /* yes, fetch z */
706 (*s
)++; /* advance unless z = NUL */
711 /* TODO: merge with strcpy_and_process_escape_sequences()?
713 static void unescape_string_in_place(char *s1
)
716 while ((*s1
= nextchar(&s
)) != '\0')
720 static ALWAYS_INLINE
int isalnum_(int c
)
722 return (isalnum(c
) || c
== '_');
725 static double my_strtod(char **pp
)
728 if (ENABLE_DESKTOP
&& cp
[0] == '0') {
729 /* Might be hex or octal integer: 0x123abc or 07777 */
730 char c
= (cp
[1] | 0x20);
731 if (c
== 'x' || isdigit(cp
[1])) {
732 unsigned long long ull
= strtoull(cp
, pp
, 0);
736 if (!isdigit(c
) && c
!= '.')
738 /* else: it may be a floating number. Examples:
739 * 009.123 (*pp points to '9')
740 * 000.123 (*pp points to '.')
741 * fall through to strtod.
745 return strtod(cp
, pp
);
748 /* -------- working with variables (set/get/copy/etc) -------- */
750 static xhash
*iamarray(var
*v
)
754 while (a
->type
& VF_CHILD
)
757 if (!(a
->type
& VF_ARRAY
)) {
759 a
->x
.array
= hash_init();
764 static void clear_array(xhash
*array
)
769 for (i
= 0; i
< array
->csize
; i
++) {
770 hi
= array
->items
[i
];
774 free(thi
->data
.v
.string
);
777 array
->items
[i
] = NULL
;
779 array
->glen
= array
->nel
= 0;
782 /* clear a variable */
783 static var
*clrvar(var
*v
)
785 if (!(v
->type
& VF_FSTR
))
788 v
->type
&= VF_DONTTOUCH
;
794 /* assign string value to variable */
795 static var
*setvar_p(var
*v
, char *value
)
803 /* same as setvar_p but make a copy of string */
804 static var
*setvar_s(var
*v
, const char *value
)
806 return setvar_p(v
, (value
&& *value
) ? xstrdup(value
) : NULL
);
809 /* same as setvar_s but sets USER flag */
810 static var
*setvar_u(var
*v
, const char *value
)
812 v
= setvar_s(v
, value
);
817 /* set array element to user string */
818 static void setari_u(var
*a
, int idx
, const char *s
)
822 v
= findvar(iamarray(a
), itoa(idx
));
826 /* assign numeric value to variable */
827 static var
*setvar_i(var
*v
, double value
)
830 v
->type
|= VF_NUMBER
;
836 static const char *getvar_s(var
*v
)
838 /* if v is numeric and has no cached string, convert it to string */
839 if ((v
->type
& (VF_NUMBER
| VF_CACHED
)) == VF_NUMBER
) {
840 fmt_num(g_buf
, MAXVARFMT
, getvar_s(intvar
[CONVFMT
]), v
->number
, TRUE
);
841 v
->string
= xstrdup(g_buf
);
842 v
->type
|= VF_CACHED
;
844 return (v
->string
== NULL
) ? "" : v
->string
;
847 static double getvar_i(var
*v
)
851 if ((v
->type
& (VF_NUMBER
| VF_CACHED
)) == 0) {
855 debug_printf_eval("getvar_i: '%s'->", s
);
856 v
->number
= my_strtod(&s
);
857 debug_printf_eval("%f (s:'%s')\n", v
->number
, s
);
858 if (v
->type
& VF_USER
) {
864 debug_printf_eval("getvar_i: '%s'->zero\n", s
);
867 v
->type
|= VF_CACHED
;
869 debug_printf_eval("getvar_i: %f\n", v
->number
);
873 /* Used for operands of bitwise ops */
874 static unsigned long getvar_i_int(var
*v
)
876 double d
= getvar_i(v
);
878 /* Casting doubles to longs is undefined for values outside
879 * of target type range. Try to widen it as much as possible */
881 return (unsigned long)d
;
882 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
883 return - (long) (unsigned long) (-d
);
886 static var
*copyvar(var
*dest
, const var
*src
)
890 dest
->type
|= (src
->type
& ~(VF_DONTTOUCH
| VF_FSTR
));
891 debug_printf_eval("copyvar: number:%f string:'%s'\n", src
->number
, src
->string
);
892 dest
->number
= src
->number
;
894 dest
->string
= xstrdup(src
->string
);
896 handle_special(dest
);
900 static var
*incvar(var
*v
)
902 return setvar_i(v
, getvar_i(v
) + 1.0);
905 /* return true if v is number or numeric string */
906 static int is_numeric(var
*v
)
909 return ((v
->type
^ VF_DIRTY
) & (VF_NUMBER
| VF_USER
| VF_DIRTY
));
912 /* return 1 when value of v corresponds to true, 0 otherwise */
913 static int istrue(var
*v
)
916 return (v
->number
!= 0);
917 return (v
->string
&& v
->string
[0]);
920 /* temporary variables allocator. Last allocated should be first freed */
921 static var
*nvalloc(int n
)
929 if ((g_cb
->pos
- g_cb
->nv
) + n
<= g_cb
->size
)
935 size
= (n
<= MINNVBLOCK
) ? MINNVBLOCK
: n
;
936 g_cb
= xzalloc(sizeof(nvblock
) + size
* sizeof(var
));
938 g_cb
->pos
= g_cb
->nv
;
940 /*g_cb->next = NULL; - xzalloc did it */
948 while (v
< g_cb
->pos
) {
957 static void nvfree(var
*v
)
961 if (v
< g_cb
->nv
|| v
>= g_cb
->pos
)
962 syntax_error(EMSG_INTERNAL_ERROR
);
964 for (p
= v
; p
< g_cb
->pos
; p
++) {
965 if ((p
->type
& (VF_ARRAY
| VF_CHILD
)) == VF_ARRAY
) {
966 clear_array(iamarray(p
));
967 free(p
->x
.array
->items
);
970 if (p
->type
& VF_WALK
) {
972 walker_list
*w
= p
->x
.walker
;
973 debug_printf_walker("nvfree: freeing walker @%p\n", &p
->x
.walker
);
977 debug_printf_walker(" free(%p)\n", w
);
986 while (g_cb
->prev
&& g_cb
->pos
== g_cb
->nv
) {
991 /* ------- awk program text parsing ------- */
993 /* Parse next token pointed by global pos, place results into global ttt.
994 * If token isn't expected, give away. Return token class
996 static uint32_t next_token(uint32_t expected
)
998 #define concat_inserted (G.next_token__concat_inserted)
999 #define save_tclass (G.next_token__save_tclass)
1000 #define save_info (G.next_token__save_info)
1001 /* Initialized to TC_OPTERM: */
1002 #define ltclass (G.next_token__ltclass)
1012 } else if (concat_inserted
) {
1013 concat_inserted
= FALSE
;
1014 t_tclass
= save_tclass
;
1021 g_lineno
= t_lineno
;
1023 while (*p
!= '\n' && *p
!= '\0')
1031 debug_printf_parse("%s: token found: TC_EOF\n", __func__
);
1033 } else if (*p
== '\"') {
1036 while (*p
!= '\"') {
1038 if (*p
== '\0' || *p
== '\n')
1039 syntax_error(EMSG_UNEXP_EOS
);
1041 *s
++ = nextchar(&pp
);
1047 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__
, t_string
);
1049 } else if ((expected
& TC_REGEXP
) && *p
== '/') {
1053 if (*p
== '\0' || *p
== '\n')
1054 syntax_error(EMSG_UNEXP_EOS
);
1058 s
[-1] = bb_process_escape_sequence((const char **)&pp
);
1070 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__
, t_string
);
1072 } else if (*p
== '.' || isdigit(*p
)) {
1075 t_double
= my_strtod(&pp
);
1078 syntax_error(EMSG_UNEXP_TOKEN
);
1080 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__
, t_double
);
1083 /* search for something known */
1088 int l
= (unsigned char) *tl
++;
1089 if (l
== (unsigned char) NTCC
) {
1093 /* if token class is expected,
1095 * and it's not a longer word,
1097 if ((tc
& (expected
| TC_WORD
| TC_NEWLINE
))
1098 && strncmp(p
, tl
, l
) == 0
1099 && !((tc
& TC_WORD
) && isalnum_(p
[l
]))
1101 /* then this is what we are looking for */
1103 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__
, l
, p
, t_info
);
1110 /* not a known token */
1112 /* is it a name? (var/array/function) */
1114 syntax_error(EMSG_UNEXP_TOKEN
); /* no */
1117 while (isalnum_(*++p
)) {
1122 /* also consume whitespace between functionname and bracket */
1123 if (!(expected
& TC_VARIABLE
) || (expected
& TC_ARRAY
))
1127 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__
, t_string
);
1132 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__
, t_string
);
1134 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__
, t_string
);
1140 /* skipping newlines in some cases */
1141 if ((ltclass
& TC_NOTERM
) && (tc
& TC_NEWLINE
))
1144 /* insert concatenation operator when needed */
1145 if ((ltclass
& TC_CONCAT1
) && (tc
& TC_CONCAT2
) && (expected
& TC_BINOP
)) {
1146 concat_inserted
= TRUE
;
1150 t_info
= OC_CONCAT
| SS
| P(35);
1157 /* Are we ready for this? */
1158 if (!(ltclass
& expected
))
1159 syntax_error((ltclass
& (TC_NEWLINE
| TC_EOF
)) ?
1160 EMSG_UNEXP_EOS
: EMSG_UNEXP_TOKEN
);
1163 #undef concat_inserted
1169 static void rollback_token(void)
1174 static node
*new_node(uint32_t info
)
1178 n
= xzalloc(sizeof(node
));
1180 n
->lineno
= g_lineno
;
1184 static void mk_re_node(const char *s
, node
*n
, regex_t
*re
)
1186 n
->info
= OC_REGEXP
;
1189 xregcomp(re
, s
, REG_EXTENDED
);
1190 xregcomp(re
+ 1, s
, REG_EXTENDED
| REG_ICASE
);
1193 static node
*condition(void)
1195 next_token(TC_SEQSTART
);
1196 return parse_expr(TC_SEQTERM
);
1199 /* parse expression terminated by given argument, return ptr
1200 * to built subtree. Terminator is eaten by parse_expr */
1201 static node
*parse_expr(uint32_t iexp
)
1209 debug_printf_parse("%s(%x)\n", __func__
, iexp
);
1212 sn
.r
.n
= glptr
= NULL
;
1213 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_REGEXP
| iexp
;
1215 while (!((tc
= next_token(xtc
)) & iexp
)) {
1217 if (glptr
&& (t_info
== (OC_COMPARE
| VV
| P(39) | 2))) {
1218 /* input redirection (<) attached to glptr node */
1219 debug_printf_parse("%s: input redir\n", __func__
);
1220 cn
= glptr
->l
.n
= new_node(OC_CONCAT
| SS
| P(37));
1222 xtc
= TC_OPERAND
| TC_UOPPRE
;
1225 } else if (tc
& (TC_BINOP
| TC_UOPPOST
)) {
1226 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__
);
1227 /* for binary and postfix-unary operators, jump back over
1228 * previous operators with higher priority */
1230 while (((t_info
& PRIMASK
) > (vn
->a
.n
->info
& PRIMASK2
))
1231 || ((t_info
== vn
->info
) && ((t_info
& OPCLSMASK
) == OC_COLON
))
1235 if ((t_info
& OPCLSMASK
) == OC_TERNARY
)
1237 cn
= vn
->a
.n
->r
.n
= new_node(t_info
);
1239 if (tc
& TC_BINOP
) {
1241 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_REGEXP
;
1242 if ((t_info
& OPCLSMASK
) == OC_PGETLINE
) {
1244 next_token(TC_GETLINE
);
1245 /* give maximum priority to this pipe */
1246 cn
->info
&= ~PRIMASK
;
1247 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_BINOP
| iexp
;
1251 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_BINOP
| iexp
;
1256 debug_printf_parse("%s: other\n", __func__
);
1257 /* for operands and prefix-unary operators, attach them
1260 cn
= vn
->r
.n
= new_node(t_info
);
1262 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_REGEXP
;
1263 if (tc
& (TC_OPERAND
| TC_REGEXP
)) {
1264 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__
);
1265 xtc
= TC_UOPPRE
| TC_UOPPOST
| TC_BINOP
| TC_OPERAND
| iexp
;
1266 /* one should be very careful with switch on tclass -
1267 * only simple tclasses should be used! */
1271 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__
);
1273 v
= hash_search(ahash
, t_string
);
1275 cn
->info
= OC_FNARG
;
1276 cn
->l
.aidx
= v
->x
.aidx
;
1278 cn
->l
.v
= newvar(t_string
);
1280 if (tc
& TC_ARRAY
) {
1282 cn
->r
.n
= parse_expr(TC_ARRTERM
);
1288 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__
);
1290 v
= cn
->l
.v
= xzalloc(sizeof(var
));
1292 setvar_i(v
, t_double
);
1294 setvar_s(v
, t_string
);
1298 debug_printf_parse("%s: TC_REGEXP\n", __func__
);
1299 mk_re_node(t_string
, cn
, xzalloc(sizeof(regex_t
)*2));
1303 debug_printf_parse("%s: TC_FUNCTION\n", __func__
);
1305 cn
->r
.f
= newfunc(t_string
);
1306 cn
->l
.n
= condition();
1310 debug_printf_parse("%s: TC_SEQSTART\n", __func__
);
1311 cn
= vn
->r
.n
= parse_expr(TC_SEQTERM
);
1313 syntax_error("Empty sequence");
1318 debug_printf_parse("%s: TC_GETLINE\n", __func__
);
1320 xtc
= TC_OPERAND
| TC_UOPPRE
| TC_BINOP
| iexp
;
1324 debug_printf_parse("%s: TC_BUILTIN\n", __func__
);
1325 cn
->l
.n
= condition();
1332 debug_printf_parse("%s() returns %p\n", __func__
, sn
.r
.n
);
1336 /* add node to chain. Return ptr to alloc'd node */
1337 static node
*chain_node(uint32_t info
)
1342 seq
->first
= seq
->last
= new_node(0);
1344 if (seq
->programname
!= g_progname
) {
1345 seq
->programname
= g_progname
;
1346 n
= chain_node(OC_NEWSOURCE
);
1347 n
->l
.new_progname
= xstrdup(g_progname
);
1352 seq
->last
= n
->a
.n
= new_node(OC_DONE
);
1357 static void chain_expr(uint32_t info
)
1361 n
= chain_node(info
);
1362 n
->l
.n
= parse_expr(TC_OPTERM
| TC_GRPTERM
);
1363 if (t_tclass
& TC_GRPTERM
)
1367 static node
*chain_loop(node
*nn
)
1369 node
*n
, *n2
, *save_brk
, *save_cont
;
1371 save_brk
= break_ptr
;
1372 save_cont
= continue_ptr
;
1374 n
= chain_node(OC_BR
| Vx
);
1375 continue_ptr
= new_node(OC_EXEC
);
1376 break_ptr
= new_node(OC_EXEC
);
1378 n2
= chain_node(OC_EXEC
| Vx
);
1381 continue_ptr
->a
.n
= n2
;
1382 break_ptr
->a
.n
= n
->r
.n
= seq
->last
;
1384 continue_ptr
= save_cont
;
1385 break_ptr
= save_brk
;
1390 /* parse group and attach it to chain */
1391 static void chain_group(void)
1397 c
= next_token(TC_GRPSEQ
);
1398 } while (c
& TC_NEWLINE
);
1400 if (c
& TC_GRPSTART
) {
1401 debug_printf_parse("%s: TC_GRPSTART\n", __func__
);
1402 while (next_token(TC_GRPSEQ
| TC_GRPTERM
) != TC_GRPTERM
) {
1403 debug_printf_parse("%s: !TC_GRPTERM\n", __func__
);
1404 if (t_tclass
& TC_NEWLINE
)
1409 debug_printf_parse("%s: TC_GRPTERM\n", __func__
);
1410 } else if (c
& (TC_OPSEQ
| TC_OPTERM
)) {
1411 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__
);
1413 chain_expr(OC_EXEC
| Vx
);
1416 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__
);
1417 switch (t_info
& OPCLSMASK
) {
1419 debug_printf_parse("%s: ST_IF\n", __func__
);
1420 n
= chain_node(OC_BR
| Vx
);
1421 n
->l
.n
= condition();
1423 n2
= chain_node(OC_EXEC
);
1425 if (next_token(TC_GRPSEQ
| TC_GRPTERM
| TC_ELSE
) == TC_ELSE
) {
1427 n2
->a
.n
= seq
->last
;
1434 debug_printf_parse("%s: ST_WHILE\n", __func__
);
1436 n
= chain_loop(NULL
);
1441 debug_printf_parse("%s: ST_DO\n", __func__
);
1442 n2
= chain_node(OC_EXEC
);
1443 n
= chain_loop(NULL
);
1445 next_token(TC_WHILE
);
1446 n
->l
.n
= condition();
1450 debug_printf_parse("%s: ST_FOR\n", __func__
);
1451 next_token(TC_SEQSTART
);
1452 n2
= parse_expr(TC_SEMICOL
| TC_SEQTERM
);
1453 if (t_tclass
& TC_SEQTERM
) { /* for-in */
1454 if ((n2
->info
& OPCLSMASK
) != OC_IN
)
1455 syntax_error(EMSG_UNEXP_TOKEN
);
1456 n
= chain_node(OC_WALKINIT
| VV
);
1459 n
= chain_loop(NULL
);
1460 n
->info
= OC_WALKNEXT
| Vx
;
1462 } else { /* for (;;) */
1463 n
= chain_node(OC_EXEC
| Vx
);
1465 n2
= parse_expr(TC_SEMICOL
);
1466 n3
= parse_expr(TC_SEQTERM
);
1476 debug_printf_parse("%s: OC_PRINT[F]\n", __func__
);
1477 n
= chain_node(t_info
);
1478 n
->l
.n
= parse_expr(TC_OPTERM
| TC_OUTRDR
| TC_GRPTERM
);
1479 if (t_tclass
& TC_OUTRDR
) {
1481 n
->r
.n
= parse_expr(TC_OPTERM
| TC_GRPTERM
);
1483 if (t_tclass
& TC_GRPTERM
)
1488 debug_printf_parse("%s: OC_BREAK\n", __func__
);
1489 n
= chain_node(OC_EXEC
);
1494 debug_printf_parse("%s: OC_CONTINUE\n", __func__
);
1495 n
= chain_node(OC_EXEC
);
1496 n
->a
.n
= continue_ptr
;
1499 /* delete, next, nextfile, return, exit */
1501 debug_printf_parse("%s: default\n", __func__
);
1507 static void parse_program(char *p
)
1516 while ((tclass
= next_token(TC_EOF
| TC_OPSEQ
| TC_GRPSTART
|
1517 TC_OPTERM
| TC_BEGIN
| TC_END
| TC_FUNCDECL
)) != TC_EOF
) {
1519 if (tclass
& TC_OPTERM
) {
1520 debug_printf_parse("%s: TC_OPTERM\n", __func__
);
1525 if (tclass
& TC_BEGIN
) {
1526 debug_printf_parse("%s: TC_BEGIN\n", __func__
);
1530 } else if (tclass
& TC_END
) {
1531 debug_printf_parse("%s: TC_END\n", __func__
);
1535 } else if (tclass
& TC_FUNCDECL
) {
1536 debug_printf_parse("%s: TC_FUNCDECL\n", __func__
);
1537 next_token(TC_FUNCTION
);
1539 f
= newfunc(t_string
);
1540 f
->body
.first
= NULL
;
1542 while (next_token(TC_VARIABLE
| TC_SEQTERM
) & TC_VARIABLE
) {
1543 v
= findvar(ahash
, t_string
);
1544 v
->x
.aidx
= f
->nargs
++;
1546 if (next_token(TC_COMMA
| TC_SEQTERM
) & TC_SEQTERM
)
1553 } else if (tclass
& TC_OPSEQ
) {
1554 debug_printf_parse("%s: TC_OPSEQ\n", __func__
);
1556 cn
= chain_node(OC_TEST
);
1557 cn
->l
.n
= parse_expr(TC_OPTERM
| TC_EOF
| TC_GRPSTART
);
1558 if (t_tclass
& TC_GRPSTART
) {
1559 debug_printf_parse("%s: TC_GRPSTART\n", __func__
);
1563 debug_printf_parse("%s: !TC_GRPSTART\n", __func__
);
1564 chain_node(OC_PRINT
);
1566 cn
->r
.n
= mainseq
.last
;
1568 } else /* if (tclass & TC_GRPSTART) */ {
1569 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__
);
1574 debug_printf_parse("%s: TC_EOF\n", __func__
);
1578 /* -------- program execution part -------- */
1580 static node
*mk_splitter(const char *s
, tsplitter
*spl
)
1588 if ((n
->info
& OPCLSMASK
) == OC_REGEXP
) {
1590 regfree(ire
); // TODO: nuke ire, use re+1?
1592 if (s
[0] && s
[1]) { /* strlen(s) > 1 */
1593 mk_re_node(s
, n
, re
);
1595 n
->info
= (uint32_t) s
[0];
1601 /* use node as a regular expression. Supplied with node ptr and regex_t
1602 * storage space. Return ptr to regex (if result points to preg, it should
1603 * be later regfree'd manually
1605 static regex_t
*as_regex(node
*op
, regex_t
*preg
)
1611 if ((op
->info
& OPCLSMASK
) == OC_REGEXP
) {
1612 return icase
? op
->r
.ire
: op
->l
.re
;
1615 s
= getvar_s(evaluate(op
, v
));
1617 cflags
= icase
? REG_EXTENDED
| REG_ICASE
: REG_EXTENDED
;
1618 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1619 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1620 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1621 * (maybe gsub is not supposed to use REG_EXTENDED?).
1623 if (regcomp(preg
, s
, cflags
)) {
1624 cflags
&= ~REG_EXTENDED
;
1625 xregcomp(preg
, s
, cflags
);
1631 /* gradually increasing buffer.
1632 * note that we reallocate even if n == old_size,
1633 * and thus there is at least one extra allocated byte.
1635 static char* qrealloc(char *b
, int n
, int *size
)
1637 if (!b
|| n
>= *size
) {
1638 *size
= n
+ (n
>>1) + 80;
1639 b
= xrealloc(b
, *size
);
1644 /* resize field storage space */
1645 static void fsrealloc(int size
)
1649 if (size
>= maxfields
) {
1651 maxfields
= size
+ 16;
1652 Fields
= xrealloc(Fields
, maxfields
* sizeof(Fields
[0]));
1653 for (; i
< maxfields
; i
++) {
1654 Fields
[i
].type
= VF_SPECIAL
;
1655 Fields
[i
].string
= NULL
;
1658 /* if size < nfields, clear extra field variables */
1659 for (i
= size
; i
< nfields
; i
++) {
1665 static int awk_split(const char *s
, node
*spl
, char **slist
)
1670 regmatch_t pmatch
[2]; // TODO: why [2]? [1] is enough...
1672 /* in worst case, each char would be a separate field */
1673 *slist
= s1
= xzalloc(strlen(s
) * 2 + 3);
1676 c
[0] = c
[1] = (char)spl
->info
;
1678 if (*getvar_s(intvar
[RS
]) == '\0')
1682 if ((spl
->info
& OPCLSMASK
) == OC_REGEXP
) { /* regex split */
1684 return n
; /* "": zero fields */
1685 n
++; /* at least one field will be there */
1687 l
= strcspn(s
, c
+2); /* len till next NUL or \n */
1688 if (regexec(icase
? spl
->r
.ire
: spl
->l
.re
, s
, 1, pmatch
, 0) == 0
1689 && pmatch
[0].rm_so
<= l
1691 l
= pmatch
[0].rm_so
;
1692 if (pmatch
[0].rm_eo
== 0) {
1696 n
++; /* we saw yet another delimiter */
1698 pmatch
[0].rm_eo
= l
;
1703 /* make sure we remove *all* of the separator chars */
1706 } while (++l
< pmatch
[0].rm_eo
);
1708 s
+= pmatch
[0].rm_eo
;
1712 if (c
[0] == '\0') { /* null split */
1720 if (c
[0] != ' ') { /* single-character split */
1722 c
[0] = toupper(c
[0]);
1723 c
[1] = tolower(c
[1]);
1727 while ((s1
= strpbrk(s1
, c
)) != NULL
) {
1735 s
= skip_whitespace(s
);
1739 while (*s
&& !isspace(*s
))
1746 static void split_f0(void)
1748 /* static char *fstrings; */
1749 #define fstrings (G.split_f0__fstrings)
1760 n
= awk_split(getvar_s(intvar
[F0
]), &fsplitter
.n
, &fstrings
);
1763 for (i
= 0; i
< n
; i
++) {
1764 Fields
[i
].string
= nextword(&s
);
1765 Fields
[i
].type
|= (VF_FSTR
| VF_USER
| VF_DIRTY
);
1768 /* set NF manually to avoid side effects */
1770 intvar
[NF
]->type
= VF_NUMBER
| VF_SPECIAL
;
1771 intvar
[NF
]->number
= nfields
;
1775 /* perform additional actions when some internal variables changed */
1776 static void handle_special(var
*v
)
1780 const char *sep
, *s
;
1781 int sl
, l
, len
, i
, bsize
;
1783 if (!(v
->type
& VF_SPECIAL
))
1786 if (v
== intvar
[NF
]) {
1787 n
= (int)getvar_i(v
);
1790 /* recalculate $0 */
1791 sep
= getvar_s(intvar
[OFS
]);
1795 for (i
= 0; i
< n
; i
++) {
1796 s
= getvar_s(&Fields
[i
]);
1799 memcpy(b
+len
, sep
, sl
);
1802 b
= qrealloc(b
, len
+l
+sl
, &bsize
);
1803 memcpy(b
+len
, s
, l
);
1808 setvar_p(intvar
[F0
], b
);
1811 } else if (v
== intvar
[F0
]) {
1812 is_f0_split
= FALSE
;
1814 } else if (v
== intvar
[FS
]) {
1816 * The POSIX-2008 standard says that changing FS should have no effect on the
1817 * current input line, but only on the next one. The language is:
1819 * > Before the first reference to a field in the record is evaluated, the record
1820 * > shall be split into fields, according to the rules in Regular Expressions,
1821 * > using the value of FS that was current at the time the record was read.
1823 * So, split up current line before assignment to FS:
1827 mk_splitter(getvar_s(v
), &fsplitter
);
1829 } else if (v
== intvar
[RS
]) {
1830 mk_splitter(getvar_s(v
), &rsplitter
);
1832 } else if (v
== intvar
[IGNORECASE
]) {
1836 n
= getvar_i(intvar
[NF
]);
1837 setvar_i(intvar
[NF
], n
> v
-Fields
? n
: v
-Fields
+1);
1838 /* right here v is invalid. Just to note... */
1842 /* step through func/builtin/etc arguments */
1843 static node
*nextarg(node
**pn
)
1848 if (n
&& (n
->info
& OPCLSMASK
) == OC_COMMA
) {
1857 static void hashwalk_init(var
*v
, xhash
*array
)
1862 walker_list
*prev_walker
;
1864 if (v
->type
& VF_WALK
) {
1865 prev_walker
= v
->x
.walker
;
1870 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker
);
1872 w
= v
->x
.walker
= xzalloc(sizeof(*w
) + array
->glen
+ 1); /* why + 1? */
1873 debug_printf_walker(" walker@%p=%p\n", &v
->x
.walker
, w
);
1874 w
->cur
= w
->end
= w
->wbuf
;
1875 w
->prev
= prev_walker
;
1876 for (i
= 0; i
< array
->csize
; i
++) {
1877 hi
= array
->items
[i
];
1879 strcpy(w
->end
, hi
->name
);
1886 static int hashwalk_next(var
*v
)
1888 walker_list
*w
= v
->x
.walker
;
1890 if (w
->cur
>= w
->end
) {
1891 walker_list
*prev_walker
= w
->prev
;
1893 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v
->x
.walker
, w
, prev_walker
);
1895 v
->x
.walker
= prev_walker
;
1899 setvar_s(v
, nextword(&w
->cur
));
1903 /* evaluate node, return 1 when result is true, 0 otherwise */
1904 static int ptest(node
*pattern
)
1906 /* ptest__v is "static": to save stack space? */
1907 return istrue(evaluate(pattern
, &G
.ptest__v
));
1910 /* read next record from stream rsm into a variable v */
1911 static int awk_getline(rstream
*rsm
, var
*v
)
1914 regmatch_t pmatch
[2];
1915 int size
, a
, p
, pp
= 0;
1916 int fd
, so
, eo
, r
, rp
;
1919 debug_printf_eval("entered %s()\n", __func__
);
1921 /* we're using our own buffer since we need access to accumulating
1924 fd
= fileno(rsm
->F
);
1929 c
= (char) rsplitter
.n
.info
;
1933 m
= qrealloc(m
, 256, &size
);
1940 if ((rsplitter
.n
.info
& OPCLSMASK
) == OC_REGEXP
) {
1941 if (regexec(icase
? rsplitter
.n
.r
.ire
: rsplitter
.n
.l
.re
,
1942 b
, 1, pmatch
, 0) == 0) {
1943 so
= pmatch
[0].rm_so
;
1944 eo
= pmatch
[0].rm_eo
;
1948 } else if (c
!= '\0') {
1949 s
= strchr(b
+pp
, c
);
1951 s
= memchr(b
+pp
, '\0', p
- pp
);
1958 while (b
[rp
] == '\n')
1960 s
= strstr(b
+rp
, "\n\n");
1963 while (b
[eo
] == '\n')
1972 memmove(m
, m
+a
, p
+1);
1977 m
= qrealloc(m
, a
+p
+128, &size
);
1980 p
+= safe_read(fd
, b
+p
, size
-p
-1);
1984 setvar_i(intvar
[ERRNO
], errno
);
1993 c
= b
[so
]; b
[so
] = '\0';
1997 c
= b
[eo
]; b
[eo
] = '\0';
1998 setvar_s(intvar
[RT
], b
+so
);
2007 debug_printf_eval("returning from %s(): %d\n", __func__
, r
);
2012 static int fmt_num(char *b
, int size
, const char *format
, double n
, int int_as_int
)
2016 const char *s
= format
;
2018 if (int_as_int
&& n
== (int)n
) {
2019 r
= snprintf(b
, size
, "%d", (int)n
);
2021 do { c
= *s
; } while (c
&& *++s
);
2022 if (strchr("diouxX", c
)) {
2023 r
= snprintf(b
, size
, format
, (int)n
);
2024 } else if (strchr("eEfgG", c
)) {
2025 r
= snprintf(b
, size
, format
, n
);
2027 syntax_error(EMSG_INV_FMT
);
2033 /* formatted output into an allocated buffer, return ptr to buffer */
2034 static char *awk_printf(node
*n
)
2039 int i
, j
, incr
, bsize
;
2044 fmt
= f
= xstrdup(getvar_s(evaluate(nextarg(&n
), v
)));
2049 while (*f
&& (*f
!= '%' || *++f
== '%'))
2051 while (*f
&& !isalpha(*f
)) {
2053 syntax_error("%*x formats are not supported");
2057 incr
= (f
- s
) + MAXVARFMT
;
2058 b
= qrealloc(b
, incr
+ i
, &bsize
);
2064 arg
= evaluate(nextarg(&n
), v
);
2067 if (c
== 'c' || !c
) {
2068 i
+= sprintf(b
+i
, s
, is_numeric(arg
) ?
2069 (char)getvar_i(arg
) : *getvar_s(arg
));
2070 } else if (c
== 's') {
2072 b
= qrealloc(b
, incr
+i
+strlen(s1
), &bsize
);
2073 i
+= sprintf(b
+i
, s
, s1
);
2075 i
+= fmt_num(b
+i
, incr
, s
, getvar_i(arg
), FALSE
);
2079 /* if there was an error while sprintf, return value is negative */
2086 b
= xrealloc(b
, i
+ 1);
2091 /* Common substitution routine.
2092 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2093 * store result into (dest), return number of substitutions.
2094 * If nm = 0, replace all matches.
2095 * If src or dst is NULL, use $0.
2096 * If subexp != 0, enable subexpression matching (\1-\9).
2098 static int awk_sub(node
*rn
, const char *repl
, int nm
, var
*src
, var
*dest
, int subexp
)
2102 int match_no
, residx
, replen
, resbufsize
;
2104 regmatch_t pmatch
[10];
2105 regex_t sreg
, *regex
;
2111 regex
= as_regex(rn
, &sreg
);
2112 sp
= getvar_s(src
? src
: intvar
[F0
]);
2113 replen
= strlen(repl
);
2114 while (regexec(regex
, sp
, 10, pmatch
, regexec_flags
) == 0) {
2115 int so
= pmatch
[0].rm_so
;
2116 int eo
= pmatch
[0].rm_eo
;
2118 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2119 resbuf
= qrealloc(resbuf
, residx
+ eo
+ replen
, &resbufsize
);
2120 memcpy(resbuf
+ residx
, sp
, eo
);
2122 if (++match_no
>= nm
) {
2127 residx
-= (eo
- so
);
2129 for (s
= repl
; *s
; s
++) {
2130 char c
= resbuf
[residx
++] = *s
;
2135 if (c
== '&' || (subexp
&& c
>= '0' && c
<= '9')) {
2137 residx
-= ((nbs
+ 3) >> 1);
2144 resbuf
[residx
++] = c
;
2146 int n
= pmatch
[j
].rm_eo
- pmatch
[j
].rm_so
;
2147 resbuf
= qrealloc(resbuf
, residx
+ replen
+ n
, &resbufsize
);
2148 memcpy(resbuf
+ residx
, sp
+ pmatch
[j
].rm_so
, n
);
2156 regexec_flags
= REG_NOTBOL
;
2161 /* Empty match (e.g. "b*" will match anywhere).
2162 * Advance by one char. */
2164 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2165 //... and will erroneously match "b" even though it is NOT at the word start.
2166 //we need REG_NOTBOW but it does not exist...
2167 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2168 //it should be able to do it correctly.
2169 /* Subtle: this is safe only because
2170 * qrealloc allocated at least one extra byte */
2171 resbuf
[residx
] = *sp
;
2179 resbuf
= qrealloc(resbuf
, residx
+ strlen(sp
), &resbufsize
);
2180 strcpy(resbuf
+ residx
, sp
);
2182 //bb_error_msg("end sp:'%s'%p", sp,sp);
2183 setvar_p(dest
? dest
: intvar
[F0
], resbuf
);
2189 static NOINLINE
int do_mktime(const char *ds
)
2194 /*memset(&then, 0, sizeof(then)); - not needed */
2195 then
.tm_isdst
= -1; /* default is unknown */
2197 /* manpage of mktime says these fields are ints,
2198 * so we can sscanf stuff directly into them */
2199 count
= sscanf(ds
, "%u %u %u %u %u %u %d",
2200 &then
.tm_year
, &then
.tm_mon
, &then
.tm_mday
,
2201 &then
.tm_hour
, &then
.tm_min
, &then
.tm_sec
,
2205 || (unsigned)then
.tm_mon
< 1
2206 || (unsigned)then
.tm_year
< 1900
2212 then
.tm_year
-= 1900;
2214 return mktime(&then
);
2217 static NOINLINE var
*exec_builtin(node
*op
, var
*res
)
2219 #define tspl (G.exec_builtin__tspl)
2225 regmatch_t pmatch
[2];
2234 isr
= info
= op
->info
;
2237 av
[2] = av
[3] = NULL
;
2238 for (i
= 0; i
< 4 && op
; i
++) {
2239 an
[i
] = nextarg(&op
);
2240 if (isr
& 0x09000000)
2241 av
[i
] = evaluate(an
[i
], &tv
[i
]);
2242 if (isr
& 0x08000000)
2243 as
[i
] = getvar_s(av
[i
]);
2248 if ((uint32_t)nargs
< (info
>> 30))
2249 syntax_error(EMSG_TOO_FEW_ARGS
);
2255 if (ENABLE_FEATURE_AWK_LIBM
)
2256 setvar_i(res
, atan2(getvar_i(av
[0]), getvar_i(av
[1])));
2258 syntax_error(EMSG_NO_MATH
);
2265 spl
= (an
[2]->info
& OPCLSMASK
) == OC_REGEXP
?
2266 an
[2] : mk_splitter(getvar_s(evaluate(an
[2], &tv
[2])), &tspl
);
2271 n
= awk_split(as
[0], spl
, &s
);
2273 clear_array(iamarray(av
[1]));
2274 for (i
= 1; i
<= n
; i
++)
2275 setari_u(av
[1], i
, nextword(&s
));
2285 i
= getvar_i(av
[1]) - 1;
2290 n
= (nargs
> 2) ? getvar_i(av
[2]) : l
-i
;
2293 s
= xstrndup(as
[0]+i
, n
);
2298 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2299 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2301 setvar_i(res
, getvar_i_int(av
[0]) & getvar_i_int(av
[1]));
2305 setvar_i(res
, ~getvar_i_int(av
[0]));
2309 setvar_i(res
, getvar_i_int(av
[0]) << getvar_i_int(av
[1]));
2313 setvar_i(res
, getvar_i_int(av
[0]) | getvar_i_int(av
[1]));
2317 setvar_i(res
, getvar_i_int(av
[0]) >> getvar_i_int(av
[1]));
2321 setvar_i(res
, getvar_i_int(av
[0]) ^ getvar_i_int(av
[1]));
2327 s1
= s
= xstrdup(as
[0]);
2329 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2330 if ((unsigned char)((*s1
| 0x20) - 'a') <= ('z' - 'a'))
2331 *s1
= (info
== B_up
) ? (*s1
& 0xdf) : (*s1
| 0x20);
2341 l
= strlen(as
[0]) - ll
;
2342 if (ll
> 0 && l
>= 0) {
2344 char *s
= strstr(as
[0], as
[1]);
2346 n
= (s
- as
[0]) + 1;
2348 /* this piece of code is terribly slow and
2349 * really should be rewritten
2351 for (i
= 0; i
<= l
; i
++) {
2352 if (strncasecmp(as
[0]+i
, as
[1], ll
) == 0) {
2364 tt
= getvar_i(av
[1]);
2367 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2368 i
= strftime(g_buf
, MAXVARFMT
,
2369 ((nargs
> 0) ? as
[0] : "%a %b %d %H:%M:%S %Z %Y"),
2372 setvar_s(res
, g_buf
);
2376 setvar_i(res
, do_mktime(as
[0]));
2380 re
= as_regex(an
[1], &sreg
);
2381 n
= regexec(re
, as
[0], 1, pmatch
, 0);
2386 pmatch
[0].rm_so
= 0;
2387 pmatch
[0].rm_eo
= -1;
2389 setvar_i(newvar("RSTART"), pmatch
[0].rm_so
);
2390 setvar_i(newvar("RLENGTH"), pmatch
[0].rm_eo
- pmatch
[0].rm_so
);
2391 setvar_i(res
, pmatch
[0].rm_so
);
2397 awk_sub(an
[0], as
[1], getvar_i(av
[2]), av
[3], res
, TRUE
);
2401 setvar_i(res
, awk_sub(an
[0], as
[1], 0, av
[2], av
[2], FALSE
));
2405 setvar_i(res
, awk_sub(an
[0], as
[1], 1, av
[2], av
[2], FALSE
));
2415 * Evaluate node - the heart of the program. Supplied with subtree
2416 * and place where to store result. returns ptr to result.
2418 #define XC(n) ((n) >> 8)
2420 static var
*evaluate(node
*op
, var
*res
)
2422 /* This procedure is recursive so we should count every byte */
2423 #define fnargs (G.evaluate__fnargs)
2424 /* seed is initialized to 1 */
2425 #define seed (G.evaluate__seed)
2426 #define sreg (G.evaluate__sreg)
2431 return setvar_s(res
, NULL
);
2433 debug_printf_eval("entered %s()\n", __func__
);
2441 } L
= L
; /* for compiler */
2452 opn
= (opinfo
& OPNMASK
);
2453 g_lineno
= op
->lineno
;
2455 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo
, opn
);
2457 /* execute inevitable things */
2458 if (opinfo
& OF_RES1
)
2459 L
.v
= evaluate(op1
, v1
);
2460 if (opinfo
& OF_RES2
)
2461 R
.v
= evaluate(op
->r
.n
, v1
+1);
2462 if (opinfo
& OF_STR1
) {
2463 L
.s
= getvar_s(L
.v
);
2464 debug_printf_eval("L.s:'%s'\n", L
.s
);
2466 if (opinfo
& OF_STR2
) {
2467 R
.s
= getvar_s(R
.v
);
2468 debug_printf_eval("R.s:'%s'\n", R
.s
);
2470 if (opinfo
& OF_NUM1
) {
2471 L_d
= getvar_i(L
.v
);
2472 debug_printf_eval("L_d:%f\n", L_d
);
2475 debug_printf_eval("switch(0x%x)\n", XC(opinfo
& OPCLSMASK
));
2476 switch (XC(opinfo
& OPCLSMASK
)) {
2478 /* -- iterative node type -- */
2482 if ((op1
->info
& OPCLSMASK
) == OC_COMMA
) {
2483 /* it's range pattern */
2484 if ((opinfo
& OF_CHECKED
) || ptest(op1
->l
.n
)) {
2485 op
->info
|= OF_CHECKED
;
2486 if (ptest(op1
->r
.n
))
2487 op
->info
&= ~OF_CHECKED
;
2493 op
= ptest(op1
) ? op
->a
.n
: op
->r
.n
;
2497 /* just evaluate an expression, also used as unconditional jump */
2501 /* branch, used in if-else and various loops */
2503 op
= istrue(L
.v
) ? op
->a
.n
: op
->r
.n
;
2506 /* initialize for-in loop */
2507 case XC( OC_WALKINIT
):
2508 hashwalk_init(L
.v
, iamarray(R
.v
));
2511 /* get next array item */
2512 case XC( OC_WALKNEXT
):
2513 op
= hashwalk_next(L
.v
) ? op
->a
.n
: op
->r
.n
;
2516 case XC( OC_PRINT
):
2517 case XC( OC_PRINTF
): {
2521 rstream
*rsm
= newfile(R
.s
);
2524 rsm
->F
= popen(R
.s
, "w");
2526 bb_perror_msg_and_die("popen");
2529 rsm
->F
= xfopen(R
.s
, opn
=='w' ? "w" : "a");
2535 if ((opinfo
& OPCLSMASK
) == OC_PRINT
) {
2537 fputs(getvar_s(intvar
[F0
]), F
);
2540 var
*v
= evaluate(nextarg(&op1
), v1
);
2541 if (v
->type
& VF_NUMBER
) {
2542 fmt_num(g_buf
, MAXVARFMT
, getvar_s(intvar
[OFMT
]),
2546 fputs(getvar_s(v
), F
);
2550 fputs(getvar_s(intvar
[OFS
]), F
);
2553 fputs(getvar_s(intvar
[ORS
]), F
);
2555 } else { /* OC_PRINTF */
2556 char *s
= awk_printf(op1
);
2564 case XC( OC_DELETE
): {
2565 uint32_t info
= op1
->info
& OPCLSMASK
;
2568 if (info
== OC_VAR
) {
2570 } else if (info
== OC_FNARG
) {
2571 v
= &fnargs
[op1
->l
.aidx
];
2573 syntax_error(EMSG_NOT_ARRAY
);
2579 s
= getvar_s(evaluate(op1
->r
.n
, v1
));
2580 hash_remove(iamarray(v
), s
);
2582 clear_array(iamarray(v
));
2587 case XC( OC_NEWSOURCE
):
2588 g_progname
= op
->l
.new_progname
;
2591 case XC( OC_RETURN
):
2595 case XC( OC_NEXTFILE
):
2606 /* -- recursive node type -- */
2610 if (L
.v
== intvar
[NF
])
2614 case XC( OC_FNARG
):
2615 L
.v
= &fnargs
[op
->l
.aidx
];
2617 res
= op
->r
.n
? findvar(iamarray(L
.v
), R
.s
) : L
.v
;
2621 setvar_i(res
, hash_search(iamarray(R
.v
), L
.s
) ? 1 : 0);
2624 case XC( OC_REGEXP
):
2626 L
.s
= getvar_s(intvar
[F0
]);
2629 case XC( OC_MATCH
):
2633 regex_t
*re
= as_regex(op1
, &sreg
);
2634 int i
= regexec(re
, L
.s
, 0, NULL
, 0);
2637 setvar_i(res
, (i
== 0) ^ (opn
== '!'));
2642 debug_printf_eval("MOVE\n");
2643 /* if source is a temporary string, jusk relink it to dest */
2644 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2645 //then L.v ends up being a string, which is wrong
2646 // if (R.v == v1+1 && R.v->string) {
2647 // res = setvar_p(L.v, R.v->string);
2648 // R.v->string = NULL;
2650 res
= copyvar(L
.v
, R
.v
);
2654 case XC( OC_TERNARY
):
2655 if ((op
->r
.n
->info
& OPCLSMASK
) != OC_COLON
)
2656 syntax_error(EMSG_POSSIBLE_ERROR
);
2657 res
= evaluate(istrue(L
.v
) ? op
->r
.n
->l
.n
: op
->r
.n
->r
.n
, res
);
2660 case XC( OC_FUNC
): {
2662 const char *sv_progname
;
2664 if (!op
->r
.f
->body
.first
)
2665 syntax_error(EMSG_UNDEF_FUNC
);
2667 vbeg
= v
= nvalloc(op
->r
.f
->nargs
+ 1);
2669 var
*arg
= evaluate(nextarg(&op1
), v1
);
2671 v
->type
|= VF_CHILD
;
2673 if (++v
- vbeg
>= op
->r
.f
->nargs
)
2679 sv_progname
= g_progname
;
2681 res
= evaluate(op
->r
.f
->body
.first
, res
);
2683 g_progname
= sv_progname
;
2690 case XC( OC_GETLINE
):
2691 case XC( OC_PGETLINE
): {
2698 if ((opinfo
& OPCLSMASK
) == OC_PGETLINE
) {
2699 rsm
->F
= popen(L
.s
, "r");
2700 rsm
->is_pipe
= TRUE
;
2702 rsm
->F
= fopen_for_read(L
.s
); /* not xfopen! */
2707 iF
= next_input_file();
2711 if (!rsm
|| !rsm
->F
) {
2712 setvar_i(intvar
[ERRNO
], errno
);
2720 i
= awk_getline(rsm
, R
.v
);
2721 if (i
> 0 && !op1
) {
2722 incvar(intvar
[FNR
]);
2729 /* simple builtins */
2730 case XC( OC_FBLTIN
): {
2731 double R_d
= R_d
; /* for compiler */
2739 R_d
= (double)rand() / (double)RAND_MAX
;
2743 if (ENABLE_FEATURE_AWK_LIBM
) {
2749 if (ENABLE_FEATURE_AWK_LIBM
) {
2755 if (ENABLE_FEATURE_AWK_LIBM
) {
2761 if (ENABLE_FEATURE_AWK_LIBM
) {
2767 if (ENABLE_FEATURE_AWK_LIBM
) {
2772 syntax_error(EMSG_NO_MATH
);
2777 seed
= op1
? (unsigned)L_d
: (unsigned)time(NULL
);
2787 L
.s
= getvar_s(intvar
[F0
]);
2793 R_d
= (ENABLE_FEATURE_ALLOW_EXEC
&& L
.s
&& *L
.s
)
2794 ? (system(L
.s
) >> 8) : 0;
2800 } else if (L
.s
&& *L
.s
) {
2801 rstream
*rsm
= newfile(L
.s
);
2811 rsm
= (rstream
*)hash_search(fdhash
, L
.s
);
2812 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm
);
2814 debug_printf_eval("OC_FBLTIN F_cl "
2815 "rsm->is_pipe:%d, ->F:%p\n",
2816 rsm
->is_pipe
, rsm
->F
);
2817 /* Can be NULL if open failed. Example:
2818 * getline line <"doesnt_exist";
2819 * close("doesnt_exist"); <--- here rsm->F is NULL
2822 err
= rsm
->is_pipe
? pclose(rsm
->F
) : fclose(rsm
->F
);
2824 hash_remove(fdhash
, L
.s
);
2827 setvar_i(intvar
[ERRNO
], errno
);
2836 case XC( OC_BUILTIN
):
2837 res
= exec_builtin(op
, res
);
2840 case XC( OC_SPRINTF
):
2841 setvar_p(res
, awk_printf(op1
));
2844 case XC( OC_UNARY
): {
2847 Ld
= R_d
= getvar_i(R
.v
);
2874 case XC( OC_FIELD
): {
2875 int i
= (int)getvar_i(R
.v
);
2882 res
= &Fields
[i
- 1];
2887 /* concatenation (" ") and index joining (",") */
2888 case XC( OC_CONCAT
):
2889 case XC( OC_COMMA
): {
2890 const char *sep
= "";
2891 if ((opinfo
& OPCLSMASK
) == OC_COMMA
)
2892 sep
= getvar_s(intvar
[SUBSEP
]);
2893 setvar_p(res
, xasprintf("%s%s%s", L
.s
, sep
, R
.s
));
2898 setvar_i(res
, istrue(L
.v
) ? ptest(op
->r
.n
) : 0);
2902 setvar_i(res
, istrue(L
.v
) ? 1 : ptest(op
->r
.n
));
2905 case XC( OC_BINARY
):
2906 case XC( OC_REPLACE
): {
2907 double R_d
= getvar_i(R
.v
);
2908 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d
, opn
);
2921 syntax_error(EMSG_DIV_BY_ZERO
);
2925 if (ENABLE_FEATURE_AWK_LIBM
)
2926 L_d
= pow(L_d
, R_d
);
2928 syntax_error(EMSG_NO_MATH
);
2932 syntax_error(EMSG_DIV_BY_ZERO
);
2933 L_d
-= (int)(L_d
/ R_d
) * R_d
;
2936 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d
);
2937 res
= setvar_i(((opinfo
& OPCLSMASK
) == OC_BINARY
) ? res
: L
.v
, L_d
);
2941 case XC( OC_COMPARE
): {
2942 int i
= i
; /* for compiler */
2945 if (is_numeric(L
.v
) && is_numeric(R
.v
)) {
2946 Ld
= getvar_i(L
.v
) - getvar_i(R
.v
);
2948 const char *l
= getvar_s(L
.v
);
2949 const char *r
= getvar_s(R
.v
);
2950 Ld
= icase
? strcasecmp(l
, r
) : strcmp(l
, r
);
2952 switch (opn
& 0xfe) {
2963 setvar_i(res
, (i
== 0) ^ (opn
& 1));
2968 syntax_error(EMSG_POSSIBLE_ERROR
);
2970 if ((opinfo
& OPCLSMASK
) <= SHIFT_TIL_THIS
)
2972 if ((opinfo
& OPCLSMASK
) >= RECUR_FROM_THIS
)
2979 debug_printf_eval("returning from %s(): %p\n", __func__
, res
);
2987 /* -------- main & co. -------- */
2989 static int awk_exit(int r
)
3000 evaluate(endseq
.first
, &tv
);
3003 /* waiting for children */
3004 for (i
= 0; i
< fdhash
->csize
; i
++) {
3005 hi
= fdhash
->items
[i
];
3007 if (hi
->data
.rs
.F
&& hi
->data
.rs
.is_pipe
)
3008 pclose(hi
->data
.rs
.F
);
3016 /* if expr looks like "var=value", perform assignment and return 1,
3017 * otherwise return 0 */
3018 static int is_assignment(const char *expr
)
3022 if (!isalnum_(*expr
) || (val
= strchr(expr
, '=')) == NULL
) {
3026 exprc
= xstrdup(expr
);
3027 val
= exprc
+ (val
- expr
);
3030 unescape_string_in_place(val
);
3031 setvar_u(newvar(exprc
), val
);
3036 /* switch to next input file */
3037 static rstream
*next_input_file(void)
3039 #define rsm (G.next_input_file__rsm)
3040 #define files_happen (G.next_input_file__files_happen)
3043 const char *fname
, *ind
;
3048 rsm
.pos
= rsm
.adv
= 0;
3051 if (getvar_i(intvar
[ARGIND
])+1 >= getvar_i(intvar
[ARGC
])) {
3058 ind
= getvar_s(incvar(intvar
[ARGIND
]));
3059 fname
= getvar_s(findvar(iamarray(intvar
[ARGV
]), ind
));
3060 if (fname
&& *fname
&& !is_assignment(fname
)) {
3061 F
= xfopen_stdin(fname
);
3066 files_happen
= TRUE
;
3067 setvar_s(intvar
[FILENAME
], fname
);
3074 int awk_main(int argc
, char **argv
) MAIN_EXTERNALLY_VISIBLE
;
3075 int awk_main(int argc
, char **argv
)
3079 llist_t
*list_v
= NULL
;
3080 llist_t
*list_f
= NULL
;
3085 char *vnames
= (char *)vNames
; /* cheat */
3086 char *vvalues
= (char *)vValues
;
3090 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3091 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3092 if (ENABLE_LOCALE_SUPPORT
)
3093 setlocale(LC_NUMERIC
, "C");
3097 /* allocate global buffer */
3098 g_buf
= xmalloc(MAXVARFMT
+ 1);
3100 vhash
= hash_init();
3101 ahash
= hash_init();
3102 fdhash
= hash_init();
3103 fnhash
= hash_init();
3105 /* initialize variables */
3106 for (i
= 0; *vnames
; i
++) {
3107 intvar
[i
] = v
= newvar(nextword(&vnames
));
3108 if (*vvalues
!= '\377')
3109 setvar_s(v
, nextword(&vvalues
));
3113 if (*vnames
== '*') {
3114 v
->type
|= VF_SPECIAL
;
3119 handle_special(intvar
[FS
]);
3120 handle_special(intvar
[RS
]);
3122 newfile("/dev/stdin")->F
= stdin
;
3123 newfile("/dev/stdout")->F
= stdout
;
3124 newfile("/dev/stderr")->F
= stderr
;
3126 /* Huh, people report that sometimes environ is NULL. Oh well. */
3127 if (environ
) for (envp
= environ
; *envp
; envp
++) {
3128 /* environ is writable, thus we don't strdup it needlessly */
3130 char *s1
= strchr(s
, '=');
3133 /* Both findvar and setvar_u take const char*
3134 * as 2nd arg -> environment is not trashed */
3135 setvar_u(findvar(iamarray(intvar
[ENVIRON
]), s
), s1
+ 1);
3139 opt_complementary
= "v::f::"; /* -v and -f can occur multiple times */
3140 opt
= getopt32(argv
, "F:v:f:W:", &opt_F
, &list_v
, &list_f
, NULL
);
3143 if (opt
& 0x1) { /* -F */
3144 unescape_string_in_place(opt_F
);
3145 setvar_s(intvar
[FS
], opt_F
);
3147 while (list_v
) { /* -v */
3148 if (!is_assignment(llist_pop(&list_v
)))
3151 if (list_f
) { /* -f */
3156 g_progname
= llist_pop(&list_f
);
3157 from_file
= xfopen_stdin(g_progname
);
3158 /* one byte is reserved for some trick in next_token */
3159 for (i
= j
= 1; j
> 0; i
+= j
) {
3160 s
= xrealloc(s
, i
+ 4096);
3161 j
= fread(s
+ i
, 1, 4094, from_file
);
3165 parse_program(s
+ 1);
3169 } else { // no -f: take program from 1st parameter
3172 g_progname
= "cmd. line";
3173 parse_program(*argv
++);
3175 if (opt
& 0x8) // -W
3176 bb_error_msg("warning: option -W is ignored");
3178 /* fill in ARGV array */
3179 setvar_i(intvar
[ARGC
], argc
);
3180 setari_u(intvar
[ARGV
], 0, "awk");
3183 setari_u(intvar
[ARGV
], ++i
, *argv
++);
3185 evaluate(beginseq
.first
, &tv
);
3186 if (!mainseq
.first
&& !endseq
.first
)
3187 awk_exit(EXIT_SUCCESS
);
3189 /* input file could already be opened in BEGIN block */
3191 iF
= next_input_file();
3193 /* passing through input files */
3196 setvar_i(intvar
[FNR
], 0);
3198 while ((i
= awk_getline(iF
, intvar
[F0
])) > 0) {
3201 incvar(intvar
[FNR
]);
3202 evaluate(mainseq
.first
, &tv
);
3209 syntax_error(strerror(errno
));
3211 iF
= next_input_file();
3214 awk_exit(EXIT_SUCCESS
);