1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
37 char EMPTY
[] = { '\0' };
39 bool innew
; /* true = infile has not been read by readrec */
42 int recsize
= RECSIZE
;
44 int fieldssize
= RECSIZE
;
46 Cell
**fldtab
; /* pointers to Cells */
47 static size_t len_inputFS
= 0;
48 static char *inputFS
= NULL
; /* FS at time of input, for field splitting */
51 int nfields
= MAXFLD
; /* last allocated slot for $i */
53 bool donefld
; /* true = implies rec broken into fields */
54 bool donerec
; /* true = record is valid (no flds have changed) */
56 int lastfld
= 0; /* last used field */
57 int argno
= 1; /* current input argument number */
58 extern Awkfloat
*ARGC
;
60 static Cell dollar0
= { OCELL
, CFLD
, NULL
, EMPTY
, 0.0, REC
|STR
|DONTFREE
, NULL
, NULL
};
61 static Cell dollar1
= { OCELL
, CFLD
, NULL
, EMPTY
, 0.0, FLD
|STR
|DONTFREE
, NULL
, NULL
};
63 void recinit(unsigned int n
)
65 if ( (record
= (char *) malloc(n
)) == NULL
66 || (fields
= (char *) malloc(n
+1)) == NULL
67 || (fldtab
= (Cell
**) calloc(nfields
+2, sizeof(*fldtab
))) == NULL
68 || (fldtab
[0] = (Cell
*) malloc(sizeof(**fldtab
))) == NULL
)
69 FATAL("out of space for $0 and fields");
72 fldtab
[0]->sval
= record
;
73 fldtab
[0]->nval
= tostring("0");
74 makefields(1, nfields
);
77 void makefields(int n1
, int n2
) /* create $n1..$n2 inclusive */
82 for (i
= n1
; i
<= n2
; i
++) {
83 fldtab
[i
] = (Cell
*) malloc(sizeof(**fldtab
));
84 if (fldtab
[i
] == NULL
)
85 FATAL("out of space in makefields %d", i
);
87 snprintf(temp
, sizeof(temp
), "%d", i
);
88 fldtab
[i
]->nval
= tostring(temp
);
97 for (i
= 1; i
< *ARGC
; i
++) {
98 p
= getargv(i
); /* find 1st real filename */
99 if (p
== NULL
|| *p
== '\0') { /* deleted or zapped */
104 setsval(lookup("FILENAME", symtab
), p
);
107 setclvar(p
); /* a commandline assignment before filename */
110 infile
= stdin
; /* no filenames, so use stdin */
115 * POSIX specifies that fields are supposed to be evaluated as if they were
116 * split using the value of FS at the time that the record's value ($0) was
119 * Since field-splitting is done lazily, we save the current value of FS
120 * whenever a new record is read in (implicitly or via getline), or when
121 * a new value is assigned to $0.
126 if ((len
= strlen(getsval(fsloc
))) < len_inputFS
) {
127 strcpy(inputFS
, *FS
); /* for subsequent field splitting */
131 len_inputFS
= len
+ 1;
132 inputFS
= (char *) realloc(inputFS
, len_inputFS
);
134 FATAL("field separator %.10s... is too long", *FS
);
135 memcpy(inputFS
, *FS
, len_inputFS
);
138 static bool firsttime
= true;
140 int getrec(char **pbuf
, int *pbufsize
, bool isrecord
) /* get next input record */
141 { /* note: cares whether buf == record */
145 int bufsize
= *pbufsize
, savebufsize
= bufsize
;
151 DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
152 *RS
, *FS
, *ARGC
, *FILENAME
);
160 while (argno
< *ARGC
|| infile
== stdin
) {
161 DPRINTF("argno=%d, file=|%s|\n", argno
, file
);
162 if (infile
== NULL
) { /* have to open a new file */
163 file
= getargv(argno
);
164 if (file
== NULL
|| *file
== '\0') { /* deleted or zapped */
168 if (isclvar(file
)) { /* a var=value arg */
174 DPRINTF("opening file %s\n", file
);
175 if (*file
== '-' && *(file
+1) == '\0')
177 else if ((infile
= fopen(file
, "r")) == NULL
)
178 FATAL("can't open file %s", file
);
180 setfval(fnrloc
, 0.0);
182 c
= readrec(&buf
, &bufsize
, infile
, innew
);
185 if (c
!= 0 || buf
[0] != '\0') { /* normal record */
189 if (freeable(fldtab
[0]))
190 xfree(fldtab
[0]->sval
);
191 fldtab
[0]->sval
= buf
; /* buf == record */
192 fldtab
[0]->tval
= REC
| STR
| DONTFREE
;
193 if (is_number(fldtab
[0]->sval
, & result
)) {
194 fldtab
[0]->fval
= result
;
195 fldtab
[0]->tval
|= NUM
;
198 setfval(nrloc
, nrloc
->fval
+1);
199 setfval(fnrloc
, fnrloc
->fval
+1);
204 /* EOF arrived on this file; set up next */
212 *pbufsize
= savebufsize
;
213 return 0; /* true end of file */
218 if (infile
!= NULL
&& infile
!= stdin
)
224 int readrec(char **pbuf
, int *pbufsize
, FILE *inf
, bool newflag
) /* read one record into buf */
227 char *rr
, *buf
= *pbuf
;
228 int bufsize
= *pbufsize
;
229 char *rs
= getsval(rsloc
);
234 fa
*pfa
= makedfa(rs
, 1);
236 found
= fnematch(pfa
, inf
, &buf
, &bufsize
, recsize
);
238 int tempstat
= pfa
->initstat
;
240 found
= fnematch(pfa
, inf
, &buf
, &bufsize
, recsize
);
241 pfa
->initstat
= tempstat
;
244 setptr(patbeg
, '\0');
245 isrec
= (found
== 0 && *buf
== '\0') ? false : true;
247 if ((sep
= *rs
) == 0) {
249 while ((c
=getc(inf
)) == '\n' && c
!= EOF
) /* skip leading \n's */
255 for (; (c
=getc(inf
)) != sep
&& c
!= EOF
; ) {
256 if (rr
-buf
+1 > bufsize
)
257 if (!adjbuf(&buf
, &bufsize
, 1+rr
-buf
,
258 recsize
, &rr
, "readrec 1"))
259 FATAL("input record `%.30s...' too long", buf
);
262 if (*rs
== sep
|| c
== EOF
)
264 if ((c
= getc(inf
)) == '\n' || c
== EOF
) /* 2 in a row */
266 if (!adjbuf(&buf
, &bufsize
, 2+rr
-buf
, recsize
, &rr
,
268 FATAL("input record `%.30s...' too long", buf
);
272 if (!adjbuf(&buf
, &bufsize
, 1+rr
-buf
, recsize
, &rr
, "readrec 3"))
273 FATAL("input record `%.30s...' too long", buf
);
275 isrec
= (c
== EOF
&& rr
== buf
) ? false : true;
279 DPRINTF("readrec saw <%s>, returns %d\n", buf
, isrec
);
283 char *getargv(int n
) /* get ARGV[n] */
287 extern Array
*ARGVtab
;
289 snprintf(temp
, sizeof(temp
), "%d", n
);
290 if (lookup(temp
, ARGVtab
) == NULL
)
292 x
= setsymtab(temp
, "", 0.0, STR
, ARGVtab
);
294 DPRINTF("getargv(%d) returns |%s|\n", n
, s
);
298 void setclvar(char *s
) /* set var=value from s */
304 for (p
=s
; *p
!= '='; p
++)
308 p
= qstring(p
, '\0');
309 q
= setsymtab(s
, p
, 0.0, STR
, symtab
);
311 if (is_number(q
->sval
, & result
)) {
315 DPRINTF("command line set %s to |%s|\n", s
, p
);
321 void fldbld(void) /* create fields from current record */
323 /* this relies on having fields[] the same length as $0 */
324 /* the fields are all stored in this one array with \0's */
325 /* possibly with a final trailing \0 not associated with any field */
332 if (!isstr(fldtab
[0]))
336 if (n
> fieldssize
) {
338 if ((fields
= (char *) malloc(n
+2)) == NULL
) /* possibly 2 final \0s */
339 FATAL("out of space for fields in fldbld %d", n
);
343 i
= 0; /* number of fields accumulated here */
344 if (inputFS
== NULL
) /* make sure we have a copy of FS */
346 if (strlen(inputFS
) > 1) { /* it's a regular expression */
347 i
= refldbld(r
, inputFS
);
348 } else if ((sep
= *inputFS
) == ' ') { /* default whitespace */
350 while (*r
== ' ' || *r
== '\t' || *r
== '\n')
357 if (freeable(fldtab
[i
]))
358 xfree(fldtab
[i
]->sval
);
359 fldtab
[i
]->sval
= fr
;
360 fldtab
[i
]->tval
= FLD
| STR
| DONTFREE
;
363 while (*r
!= ' ' && *r
!= '\t' && *r
!= '\n' && *r
!= '\0');
367 } else if ((sep
= *inputFS
) == 0) { /* new: FS="" => 1 char/field */
368 for (i
= 0; *r
!= '\0'; r
+= n
) {
369 char buf
[MB_LEN_MAX
+ 1];
374 if (freeable(fldtab
[i
]))
375 xfree(fldtab
[i
]->sval
);
376 n
= mblen(r
, MB_LEN_MAX
);
381 fldtab
[i
]->sval
= tostring(buf
);
382 fldtab
[i
]->tval
= FLD
| STR
;
385 } else if (*r
!= 0) { /* if 0, it's a null field */
386 /* subtlecase : if length(FS) == 1 && length(RS > 0)
387 * \n is NOT a field separator (cf awk book 61,84).
388 * this variable is tested in the inner while loop.
390 int rtest
= '\n'; /* normal case */
397 if (freeable(fldtab
[i
]))
398 xfree(fldtab
[i
]->sval
);
399 fldtab
[i
]->sval
= fr
;
400 fldtab
[i
]->tval
= FLD
| STR
| DONTFREE
;
401 while (*r
!= sep
&& *r
!= rtest
&& *r
!= '\0') /* \n is always a separator */
410 FATAL("record `%.30s...' has too many fields; can't happen", r
);
411 cleanfld(i
+1, lastfld
); /* clean out junk from previous record */
414 for (j
= 1; j
<= lastfld
; j
++) {
418 if(is_number(p
->sval
, & result
)) {
423 setfval(nfloc
, (Awkfloat
) lastfld
);
424 donerec
= true; /* restore */
426 for (j
= 0; j
<= lastfld
; j
++) {
428 printf("field %d (%s): |%s|\n", j
, p
->nval
, p
->sval
);
433 void cleanfld(int n1
, int n2
) /* clean out fields n1 .. n2 inclusive */
434 { /* nvals remain intact */
438 for (i
= n1
; i
<= n2
; i
++) {
443 p
->tval
= FLD
| STR
| DONTFREE
;
447 void newfld(int n
) /* add field n after end of existing lastfld */
451 cleanfld(lastfld
+1, n
);
453 setfval(nfloc
, (Awkfloat
) n
);
456 void setlastfld(int n
) /* set lastfld cleaning fldtab cells if necessary */
459 FATAL("cannot set NF to a negative value");
464 cleanfld(lastfld
+1, n
);
466 cleanfld(n
+1, lastfld
);
471 Cell
*fieldadr(int n
) /* get nth field */
474 FATAL("trying to access out of range field %d", n
);
475 if (n
> nfields
) /* fields after NF are empty */
476 growfldtab(n
); /* but does not increase NF */
480 void growfldtab(int n
) /* make new fields up to at least $n */
482 int nf
= 2 * nfields
;
487 s
= (nf
+1) * (sizeof (struct Cell
*)); /* freebsd: how much do we need? */
488 if (s
/ sizeof(struct Cell
*) - 1 == (size_t)nf
) /* didn't overflow */
489 fldtab
= (Cell
**) realloc(fldtab
, s
);
490 else /* overflow sizeof int */
491 xfree(fldtab
); /* make it null */
493 FATAL("out of space creating %d fields", nf
);
494 makefields(nfields
+1, nf
);
498 int refldbld(const char *rec
, const char *fs
) /* build fields from reg expr in FS */
500 /* this relies on having fields[] the same length as $0 */
501 /* the fields are all stored in this one array with \0's */
507 if (n
> fieldssize
) {
509 if ((fields
= (char *) malloc(n
+1)) == NULL
)
510 FATAL("out of space for fields in refldbld %d", n
);
517 pfa
= makedfa(fs
, 1);
518 DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec
, fs
);
519 tempstat
= pfa
->initstat
;
523 if (freeable(fldtab
[i
]))
524 xfree(fldtab
[i
]->sval
);
525 fldtab
[i
]->tval
= FLD
| STR
| DONTFREE
;
526 fldtab
[i
]->sval
= fr
;
527 DPRINTF("refldbld: i=%d\n", i
);
528 if (nematch(pfa
, rec
)) {
529 pfa
->initstat
= 2; /* horrible coupling to b.c */
530 DPRINTF("match %s (%d chars)\n", patbeg
, patlen
);
531 strncpy(fr
, rec
, patbeg
-rec
);
532 fr
+= patbeg
- rec
+ 1;
534 rec
= patbeg
+ patlen
;
536 DPRINTF("no match %s\n", rec
);
538 pfa
->initstat
= tempstat
;
545 void recbld(void) /* create $0 from $1..$NF if necessary */
549 char *sep
= getsval(ofsloc
);
554 for (i
= 1; i
<= *NF
; i
++) {
555 p
= getsval(fldtab
[i
]);
556 if (!adjbuf(&record
, &recsize
, 1+strlen(p
)+r
-record
, recsize
, &r
, "recbld 1"))
557 FATAL("created $0 `%.30s...' too long", record
);
558 while ((*r
= *p
++) != 0)
561 if (!adjbuf(&record
, &recsize
, 2+strlen(sep
)+r
-record
, recsize
, &r
, "recbld 2"))
562 FATAL("created $0 `%.30s...' too long", record
);
563 for (p
= sep
; (*r
= *p
++) != 0; )
567 if (!adjbuf(&record
, &recsize
, 2+r
-record
, recsize
, &r
, "recbld 3"))
568 FATAL("built giant record `%.30s...'", record
);
570 DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS
, (void*)fldtab
[0]);
572 if (freeable(fldtab
[0]))
573 xfree(fldtab
[0]->sval
);
574 fldtab
[0]->tval
= REC
| STR
| DONTFREE
;
575 fldtab
[0]->sval
= record
;
577 DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS
, (void*)fldtab
[0]);
578 DPRINTF("recbld = |%s|\n", record
);
584 void yyerror(const char *s
)
589 void SYNTAX(const char *fmt
, ...)
591 extern char *cmdname
, *curfname
;
592 static int been_here
= 0;
597 fprintf(stderr
, "%s: ", cmdname
);
599 vfprintf(stderr
, fmt
, varg
);
601 fprintf(stderr
, " at source line %d", lineno
);
602 if (curfname
!= NULL
)
603 fprintf(stderr
, " in function %s", curfname
);
604 if (compile_time
== COMPILING
&& cursource() != NULL
)
605 fprintf(stderr
, " source file %s", cursource());
606 fprintf(stderr
, "\n");
611 extern int bracecnt
, brackcnt
, parencnt
;
613 void bracecheck(void)
616 static int beenhere
= 0;
620 while ((c
= input()) != EOF
&& c
!= '\0')
622 bcheck2(bracecnt
, '{', '}');
623 bcheck2(brackcnt
, '[', ']');
624 bcheck2(parencnt
, '(', ')');
627 void bcheck2(int n
, int c1
, int c2
)
630 fprintf(stderr
, "\tmissing %c\n", c2
);
632 fprintf(stderr
, "\t%d missing %c's\n", n
, c2
);
634 fprintf(stderr
, "\textra %c\n", c2
);
636 fprintf(stderr
, "\t%d extra %c's\n", -n
, c2
);
639 void FATAL(const char *fmt
, ...)
641 extern char *cmdname
;
645 fprintf(stderr
, "%s: ", cmdname
);
647 vfprintf(stderr
, fmt
, varg
);
650 if (dbg
> 1) /* core dump if serious debugging on */
655 void WARNING(const char *fmt
, ...)
657 extern char *cmdname
;
661 fprintf(stderr
, "%s: ", cmdname
);
663 vfprintf(stderr
, fmt
, varg
);
670 extern Node
*curnode
;
672 fprintf(stderr
, "\n");
673 if (compile_time
!= ERROR_PRINTING
) {
675 fprintf(stderr
, " input record number %d", (int) (*FNR
));
676 if (strcmp(*FILENAME
, "-") != 0)
677 fprintf(stderr
, ", file %s", *FILENAME
);
678 fprintf(stderr
, "\n");
681 fprintf(stderr
, " source line number %d", curnode
->lineno
);
683 fprintf(stderr
, " source line number %d", lineno
);
684 if (compile_time
== COMPILING
&& cursource() != NULL
)
685 fprintf(stderr
, " source file %s", cursource());
686 fprintf(stderr
, "\n");
691 void eprint(void) /* try to print context around error */
695 static int been_here
= 0;
696 extern char ebuf
[], *ep
;
698 if (compile_time
!= COMPILING
|| been_here
++ > 0 || ebuf
== ep
)
703 if (p
> ebuf
&& *p
== '\n')
705 for ( ; p
> ebuf
&& *p
!= '\n' && *p
!= '\0'; p
--)
709 fprintf(stderr
, " context is\n\t");
710 for (q
=ep
-1; q
>=p
&& *q
!=' ' && *q
!='\t' && *q
!='\n'; q
--)
715 fprintf(stderr
, " >>> ");
719 fprintf(stderr
, " <<< ");
721 while ((c
= input()) != '\n' && c
!= '\0' && c
!= EOF
) {
732 case '{': bracecnt
++; break;
733 case '}': bracecnt
--; break;
734 case '[': brackcnt
++; break;
735 case ']': brackcnt
--; break;
736 case '(': parencnt
++; break;
737 case ')': parencnt
--; break;
741 double errcheck(double x
, const char *s
)
746 WARNING("%s argument out of domain", s
);
748 } else if (errno
== ERANGE
) {
750 WARNING("%s result out of range", s
);
756 int isclvar(const char *s
) /* is s of form var=something ? */
760 if (!isalpha((uschar
) *s
) && *s
!= '_')
763 if (!(isalnum((uschar
) *s
) || *s
== '_'))
765 return *s
== '=' && s
> os
;
768 /* strtod is supposed to be a proper test of what's a valid number */
769 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
770 /* wrong: violates 4.10.1.4 of ansi C standard */
772 /* well, not quite. As of C99, hex floating point is allowed. so this is
773 * a bit of a mess. We work around the mess by checking for a hexadecimal
774 * value and disallowing it. Similarly, we now follow gawk and allow only
775 * +nan, -nan, +inf, and -inf for NaN and infinity values.
779 * This routine now has a more complicated interface, the main point
780 * being to avoid the double conversion of a string to double, and
781 * also to convey out, if requested, the information that the numeric
782 * value was a leading string or is all of the string. The latter bit
783 * is used in getfval().
786 bool is_valid_number(const char *s
, bool trailing_stuff_ok
,
787 bool *no_trailing
, double *result
)
796 *no_trailing
= false;
801 // no hex floating point, sorry
802 if (s
[0] == '0' && tolower(s
[1]) == 'x')
805 // allow +nan, -nan, +inf, -inf, any other letter, no
806 if (s
[0] == '+' || s
[0] == '-') {
807 is_nan
= (strncasecmp(s
+1, "nan", 3) == 0);
808 is_inf
= (strncasecmp(s
+1, "inf", 3) == 0);
809 if ((is_nan
|| is_inf
)
810 && (isspace(s
[4]) || s
[4] == '\0'))
812 else if (! isdigit(s
[1]) && s
[1] != '.')
815 else if (! isdigit(s
[0]) && s
[0] != '.')
821 if (ep
== s
|| errno
== ERANGE
)
824 if (isnan(r
) && s
[0] == '-' && signbit(r
) == 0)
831 * check for trailing stuff
836 if (no_trailing
!= NULL
)
837 *no_trailing
= (*ep
== '\0');
839 // return true if found the end, or trailing stuff is allowed
840 retval
= *ep
== '\0' || trailing_stuff_ok
;