4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved.
31 * awk -- mainline, yylex, etc.
33 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
43 static char *progfiles
[NPFILE
]; /* Programmes files for yylex */
44 static char **progfilep
= &progfiles
[0]; /* Pointer to last file */
45 static wchar_t *progptr
; /* In-memory programme */
46 static int proglen
; /* Length of progptr */
47 static wchar_t context
[NCONTEXT
]; /* Circular buffer of context */
48 static wchar_t *conptr
= &context
[0]; /* context ptr */
49 static FILE *progfp
; /* Stdio stream for programme */
50 static char *filename
;
55 #define AWK_EXEC_MAGIC "<MKS AWKC>"
56 #define LEN_EXEC_MAGIC 10
58 static char unbal
[] = "unbalanced E char";
60 static void awkarginit(int c
, char **av
);
61 static int lexid(wint_t c
);
62 static int lexnumber(wint_t c
);
63 static int lexstring(wint_t endc
);
64 static int lexregexp(wint_t endc
);
66 static void awkvarinit(void);
67 static wint_t lexgetc(void);
68 static void lexungetc(wint_t c
);
69 static size_t lexescape(wint_t endc
, int regx
, int cmd_line_operand
);
70 static void awkierr(int perr
, char *fmt
, va_list ap
) __NORETURN
;
71 static int usage(void);
72 void strescape(wchar_t *str
);
73 static const char *toprint(wint_t);
75 static wchar_t *mbconvert(char *str
);
77 extern int isclvar(wchar_t *arg
);
83 main(int argc
, char *argv
[])
91 linebuf
= emalloc(NLINE
* sizeof (wchar_t));
94 * At this point only messaging should be internationalized.
95 * numbers are still scanned as in the Posix locale.
97 (void) setlocale(LC_ALL
, "");
98 (void) setlocale(LC_NUMERIC
, "C");
99 #if !defined(TEXT_DOMAIN)
100 #define TEXT_DOMAIN "SYS_TEST"
102 (void) textdomain(TEXT_DOMAIN
);
106 while (argc
> 1 && *argv
[1] == '-') {
107 void *save_ptr
= NULL
;
108 ap
= mbstowcsdup(&argv
[1][1]);
115 save_ptr
= (void *) ap
;
118 if (*ap
== '-' && ap
[1] == '\0')
120 for (; *ap
!= '\0'; ++ap
) {
130 (void) fprintf(stderr
,
131 gettext("Missing script file\n"));
134 *progfilep
++ = argv
[1];
142 (void) fprintf(stderr
,
143 gettext("Missing field separator\n"));
146 ap
= mbstowcsdup(argv
[1]);
152 strassign(varFS
, linebuf
, FALLOC
,
161 (void) fprintf(stderr
,
162 gettext("Missing variable assignment\n"));
165 arg
= mbconvert(argv
[1]);
167 * Ensure the variable expression
168 * is valid (correct form).
170 if (((vp
= wcschr(arg
, '=')) != NULL
) &&
174 strassign(vlook(arg
), linebuf
,
179 (void) fprintf(stderr
, gettext(
180 "Invalid form for variable "
181 "assignment: %S\n"), arg
);
190 (void) fprintf(stderr
,
191 gettext("Unknown option \"-%S\"\n"), ap
);
199 if (progfilep
== &progfiles
[0]) {
202 filename
= "[command line]"; /* BUG: NEEDS TRANSLATION */
203 progptr
= mbstowcsdup(argv
[1]);
204 proglen
= wcslen(progptr
);
211 awkarginit(argc
, argv
);
218 * Ok, done parsing, so now activate the rest of the nls stuff, set
219 * the radix character.
221 (void) setlocale(LC_ALL
, "");
222 radixpoint
= *localeconv()->decimal_point
;
229 * Do initial setup of buffers, etc.
230 * This must be called before most processing
231 * and especially before lexical analysis.
232 * Variables initialised here will be overruled by command
233 * line parameter initialisation.
240 (void) setvbuf(stderr
, NULL
, _IONBF
, 0);
242 if ((NIOSTREAM
= sysconf(_SC_OPEN_MAX
) - 4) <= 0) {
243 (void) fprintf(stderr
,
244 gettext("not enough available file descriptors"));
247 ofiles
= (OFILE
*)emalloc(sizeof (OFILE
)*NIOSTREAM
);
248 #ifdef A_ZERO_POINTERS
249 (void) memset((wchar_t *)ofiles
, 0, sizeof (OFILE
) * NIOSTREAM
);
252 /* initialize file descriptor table */
254 for (fp
= ofiles
; fp
< &ofiles
[NIOSTREAM
]; fp
+= 1) {
257 fp
->f_name
= (char *)0;
261 constant
= intnode((INT
)0);
263 const0
= intnode((INT
)0);
264 const1
= intnode((INT
)1);
265 constundef
= emptynode(CONSTANT
, 0);
266 constundef
->n_flags
= FSTRING
|FVINT
;
267 constundef
->n_string
= _null
;
268 constundef
->n_strlen
= 0;
269 inc_oper
= emptynode(ADD
, 0);
270 inc_oper
->n_right
= const1
;
271 asn_oper
= emptynode(ADD
, 0);
272 field0
= node(FIELD
, const0
, NNULL
);
277 for (rp
= &resfuncs
[0]; rp
->rf_name
!= (LOCCHARP
)NULL
; ++rp
) {
278 np
= finstall(rp
->rf_name
, rp
->rf_func
, rp
->rf_type
);
284 for (rp
= &reserved
[0]; rp
->r_name
!= (LOCCHARP
)NULL
; ++rp
) {
285 switch (rp
->r_type
) {
289 np
= vlook(rp
->r_name
);
290 if (rp
->r_type
== SVAR
)
291 np
->n_flags
|= FSPECIAL
;
292 if (rp
->r_svalue
!= NULL
)
293 strassign(np
, rp
->r_svalue
, FSTATIC
,
294 (size_t)rp
->r_ivalue
);
296 constant
->n_int
= rp
->r_ivalue
;
297 (void) assign(np
, constant
);
303 kinstall(rp
->r_name
, (int)rp
->r_ivalue
);
310 varFNR
= vlook(s_FNR
);
312 varOFMT
= vlook(s_OFMT
);
313 varCONVFMT
= vlook(s_CONVFMT
);
314 varOFS
= vlook(s_OFS
);
315 varORS
= vlook(s_ORS
);
318 varARGC
= vlook(s_ARGC
);
319 varSUBSEP
= vlook(s_SUBSEP
);
320 varENVIRON
= vlook(s_ENVIRON
);
321 varFILENAME
= vlook(s_FILENAME
);
322 varSYMTAB
= vlook(s_SYMTAB
);
323 incNR
= node(ASG
, varNR
, node(ADD
, varNR
, const1
));
324 incFNR
= node(ASG
, varFNR
, node(ADD
, varFNR
, const1
));
325 clrFNR
= node(ASG
, varFNR
, const0
);
329 * Initialise awk ARGC, ARGV variables.
332 awkarginit(int ac
, char **av
)
337 ARGVsubi
= node(INDEX
, vlook(s_ARGV
), constant
);
339 constant
->n_int
= ac
;
340 (void) assign(varARGC
, constant
);
341 for (i
= 0; i
< ac
; ++i
) {
342 cp
= mbstowcsdup(av
[i
]);
344 strassign(exprreduce(ARGVsubi
), cp
,
345 FSTATIC
|FSENSE
, wcslen(cp
));
351 * Clean up when done parsing a function.
352 * All formal parameters, because of a deal (funparm) in
353 * yylex, get put into the symbol table in front of any
354 * global variable of the same name. When the entire
355 * function is parsed, remove these formal dummy nodes
356 * from the symbol table but retain the nodes because
357 * the generated tree points at them.
364 while ((formal
= getlist(&np
)) != NNULL
)
365 delsymtab(formal
, 0);
369 * The lexical analyzer.
376 static int savetoken
= 0;
378 static int isfuncdef
;
379 static int nbrace
, nparen
, nbracket
;
380 static struct ctosymstruct
{
383 { '|', BAR
}, { '^', CARAT
},
384 { '~', TILDE
}, { '<', LANGLE
},
385 { '>', RANGLE
}, { '+', PLUSC
},
386 { '-', HYPHEN
}, { '*', STAR
},
387 { '/', SLASH
}, { '%', PERCENT
},
388 { '!', EXCLAMATION
}, { '$', DOLLAR
},
389 { '[', LSQUARE
}, { ']', RSQUARE
},
390 { '(', LPAREN
}, { ')', RPAREN
},
391 { ';', SEMI
}, { '{', LBRACE
},
392 { '}', RBRACE
}, { 0, 0 }
398 } else if (redelim
!= '\0') {
403 c
= lexlast
= lexregexp(c
);
405 } else while ((c
= lexgetc()) != WEOF
) {
406 if (iswalpha(c
) || c
== '_') {
408 } else if (iswdigit(c
) || c
== '.') {
410 } else if (isWblank(c
)) {
423 while ((c
= lexgetc()) != '\n' && c
!= WEOF
)
429 if ((c1
= lexgetc()) == '+')
438 if ((c1
= lexgetc()) == '-')
447 if ((c1
= lexgetc()) == '=')
449 else if (c1
== '*') {
450 if ((c1
= lexgetc()) == '=')
461 if ((c1
= lexgetc()) == '=') {
470 if ((c1
= lexgetc()) == '=' &&
471 lexlast
!= RE
&& lexlast
!= NRE
&&
472 lexlast
!= ';' && lexlast
!= '\n' &&
473 lexlast
!= ',' && lexlast
!= '(')
480 if ((c1
= lexgetc()) == '=')
487 if ((c1
= lexgetc()) == '&')
494 if ((c1
= lexgetc()) == '|')
504 if ((c1
= lexgetc()) == '=')
510 if (nparen
== 0 && inprint
)
516 if ((c1
= lexgetc()) == '=')
523 if ((c1
= lexgetc()) == '=')
532 if ((c1
= lexgetc()) == '=')
543 if (catterm
|| inprint
) {
569 if (lexlast
!= ';') {
597 if (lexlast
!= ';') {
608 if (--nbracket
< 0) {
619 if ((c1
= lexgetc()) == '\n')
639 gettext("invalid character \"%s\""),
656 } else if (!isfuncdef
) {
657 if ((c1
= lexgetc()) != '(')
679 if (!catterm
|| lexlast
!= CONSTANT
|| wasfield
)
712 * Map character constants to symbolic names.
714 for (i
= 0; ctosym
[i
].c
!= 0; i
++)
715 if (c
== ctosym
[i
].c
) {
722 (void) printf("%d\n", (int)c
);
728 * Read a number for the lexical analyzer.
729 * Input is the first character of the number.
730 * Return value is the lexical type.
747 } else if (c
== 'e' || c
== 'E') {
748 if ((c
= lexgetc()) != '-' && c
!= '+') {
758 } while ((c
= lexgetc()) != WEOF
);
760 if (dotfound
&& cp
== linebuf
+1)
764 if (!dotfound
&& !efound
&&
765 ((number
= wcstol(linebuf
, (wchar_t **)0, 10)), errno
!= ERANGE
))
766 yylval
.node
= intnode(number
);
768 yylval
.node
= realnode((REAL
)wcstod(linebuf
, (wchar_t **)0));
773 * Read an identifier.
774 * Input is first character of identifier.
788 } while (iswalpha(c
) || iswdigit(c
) || c
== '_');
791 yylval
.node
= np
= vlook(linebuf
);
793 switch (np
->n_type
) {
795 switch (np
->n_keywtype
) {
801 return ((int)np
->n_keywtype
);
808 * If reading the argument list, create a dummy node
809 * for the duration of that function. These variables
810 * can be removed from the symbol table at function end
811 * but they must still exist because the execution tree
816 np
= emptynode(PARM
, i
= (cp
-linebuf
));
817 np
->n_flags
= FSTRING
;
818 np
->n_string
= _null
;
820 (void) memcpy(np
->n_name
, linebuf
,
821 (i
+1) * sizeof (wchar_t));
824 } else if (np
== varNF
|| (np
== varFS
&&
825 (!doing_begin
|| begin_getline
))) {
827 * If the user program references NF or sets
828 * FS either outside of a begin block or
829 * in a begin block after a getline then the
830 * input line will be split immediately upon read
831 * rather than when a field is first referenced.
834 } else if (np
== varENVIRON
)
842 * It is ok to redefine functions as parameters
844 if (funparm
) goto do_funparm
;
849 * When a getline is encountered, clear the 'doing_begin' flag.
850 * This will force the 'needsplit' flag to be set, even inside
851 * a begin block, if FS is altered. (See VAR case above)
862 * Read a string for the lexical analyzer.
863 * `endc' terminates the string.
866 lexstring(wint_t endc
)
868 size_t length
= lexescape(endc
, 0, 0);
870 yylval
.node
= stringnode(linebuf
, FALLOC
, length
);
875 * Read a regular expression.
878 lexregexp(wint_t endc
)
880 (void) lexescape(endc
, 1, 0);
881 yylval
.node
= renode(linebuf
);
886 * Process a string, converting the escape characters as required by
887 * 1003.2. The processed string ends up in the global linebuf[]. This
888 * routine also changes the value of 'progfd' - the program file
889 * descriptor, so it should be used with some care. It is presently used to
890 * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
893 strescape(wchar_t *str
)
896 proglen
= wcslen(str
) + 1; /* Include \0 */
897 (void) lexescape('\0', 0, 1);
902 * Read a string or regular expression, terminated by ``endc'',
903 * for lexical analyzer, processing escape sequences.
904 * Return string length.
907 lexescape(wint_t endc
, int regx
, int cmd_line_operand
)
909 static char nlre
[256];
910 static char nlstr
[256];
911 static char eofre
[256];
912 static char eofstr
[256];
918 if (first_time
== 1) {
919 (void) strcpy(nlre
, gettext("Newline in regular expression\n"));
920 (void) strcpy(nlstr
, gettext("Newline in string\n"));
921 (void) strcpy(eofre
, gettext("EOF in regular expression\n"));
922 (void) strcpy(eofstr
, gettext("EOF in string\n"));
927 while ((c
= lexgetc()) != endc
) {
929 awkerr(regx
? nlre
: nlstr
);
931 switch (c
= lexgetc(), c
) {
971 while (iswxdigit(c
= lexgetc())) {
974 else if (iswupper(c
))
994 * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
995 * requires processing of the octal escapes both in strings and
996 * regular expressions. The following code is disabled instead of
997 * removed as back-referencing may be reintroduced in a future draft
1001 * For regular expressions, we disallow
1002 * \ooo to mean octal character, in favour
1003 * of back referencing.
1014 if ((c
= lexgetc()) > '7' || c
< '0')
1019 * an octal escape sequence must have at least
1020 * 2 digits after the backslash, otherwise
1021 * it gets passed straight thru for possible
1022 * use in backreferencing.
1035 if (c
!= endc
|| cmd_line_operand
) {
1043 awkerr(regx
? eofre
: eofstr
);
1047 return (cp
- linebuf
);
1051 * Build a regular expression NODE.
1052 * Argument is the string holding the expression.
1060 np
= emptynode(RE
, 0);
1061 np
->n_left
= np
->n_right
= NNULL
;
1062 if ((n
= REGWCOMP(&np
->n_regexp
, s
)) != REG_OK
) {
1066 m
= REGWERROR(n
, np
->n_regexp
, NULL
, 0);
1067 p
= (char *)emalloc(m
);
1068 REGWERROR(n
, np
->n_regexp
, p
, m
);
1069 awkerr("/%S/: %s", s
, p
);
1074 * Get a character for the lexical analyser routine.
1080 static char **files
= &progfiles
[0];
1082 if (progfp
!= FNULL
&& (c
= fgetwc(progfp
)) != WEOF
)
1085 if (progptr
!= NULL
) {
1091 if (progfp
!= FNULL
) {
1092 if (progfp
!= stdin
)
1093 (void) fclose(progfp
);
1098 if (files
< progfilep
) {
1099 filename
= *files
++;
1101 if (filename
[0] == '-' && filename
[1] == '\0')
1103 else if ((progfp
= fopen(filename
, r
))
1105 (void) fprintf(stderr
,
1106 gettext("script file \"%s\""), filename
);
1115 if (conptr
>= &context
[NCONTEXT
])
1116 conptr
= &context
[0];
1123 * Return a character for lexical analyser.
1124 * Only one returned character is (not enforced) legitimite.
1132 if (conptr
== &context
[0])
1133 conptr
= &context
[NCONTEXT
];
1136 if (progfp
!= FNULL
) {
1137 (void) ungetwc(c
, progfp
);
1147 * Syntax errors during parsing.
1150 yyerror(char *s
, ...)
1152 if (lexlast
== FUNC
|| lexlast
== GETLINE
|| lexlast
== KEYWORD
)
1153 if (lexlast
== KEYWORD
)
1154 awkerr(gettext("inadmissible use of reserved keyword"));
1156 awkerr(gettext("attempt to redefine builtin function"));
1161 * Error routine for all awk errors.
1165 awkerr(char *fmt
, ...)
1169 va_start(args
, fmt
);
1170 awkierr(0, fmt
, args
);
1175 * Error routine like "awkerr" except that it prints out
1176 * a message that includes an errno-specific indication.
1180 awkperr(char *fmt
, ...)
1184 va_start(args
, fmt
);
1185 awkierr(1, fmt
, args
);
1190 * Common internal routine for awkerr, awkperr
1193 awkierr(int perr
, char *fmt
, va_list ap
)
1195 static char sep1
[] = "\n>>>\t";
1196 static char sep2
[] = "\t<<<";
1197 int saveerr
= errno
;
1199 (void) fprintf(stderr
, "%s: ", _cmdname
);
1201 (void) fprintf(stderr
, gettext("line %u ("),
1202 curnode
== NNULL
? 0 : curnode
->n_lineno
);
1204 (void) fprintf(stderr
, "NR=%lld): ",
1205 (INT
)exprint(varNR
));
1207 (void) fprintf(stderr
, "%s): ",
1208 phase
== BEGIN
? s_BEGIN
: s_END
);
1209 } else if (lineno
!= 0) {
1210 (void) fprintf(stderr
, gettext("file \"%s\": "), filename
);
1211 (void) fprintf(stderr
, gettext("line %u: "), lineno
);
1213 (void) vfprintf(stderr
, gettext(fmt
), ap
);
1215 (void) fprintf(stderr
, ": %s", strerror(saveerr
));
1216 if (perr
!= 2 && !running
) {
1221 (void) fprintf(stderr
, gettext(" Context is:%s"), sep1
);
1225 if (cp
>= &context
[NCONTEXT
])
1227 if ((c
= *cp
++) != '\0')
1228 (void) fputs(c
== '\n' ? sep1
: toprint(c
),
1231 (void) fputs(sep2
, stderr
);
1233 (void) fprintf(stderr
, "\n");
1242 if ((cp
= malloc(n
)) == NULL
)
1248 erealloc(wchar_t *p
, unsigned n
)
1252 if ((cp
= realloc(p
, n
)) == NULL
)
1259 * usage message for awk
1264 (void) fprintf(stderr
, gettext(
1265 "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
1266 " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
1272 mbconvert(char *str
)
1274 static wchar_t *op
= 0;
1278 return (op
= mbstowcsdup(str
));
1282 mbunconvert(wchar_t *str
)
1284 static char *op
= 0;
1288 return (op
= wcstombsdup(str
));
1292 * Solaris port - following functions are typical MKS functions written
1293 * to work for Solaris.
1297 mbstowcsdup(char *s
)
1303 if ((w
= (wchar_t *)malloc(n
* sizeof (wchar_t))) == NULL
)
1306 if (mbstowcs(w
, s
, n
) == (size_t)-1)
1313 wcstombsdup(wchar_t *w
)
1318 /* Fetch memory for worst case string length */
1321 if ((mb
= (char *)malloc(n
)) == NULL
) {
1325 /* Convert the string */
1326 if ((n
= wcstombs(mb
, w
, n
)) == -1) {
1334 /* Shrink the string down */
1335 if ((mb
= (char *)realloc(mb
, strlen(mb
)+1)) == NULL
) {
1342 * The upe_ctrls[] table contains the printable 'control-sequences' for the
1343 * character values 0..31 and 127. The first entry is for value 127, thus the
1344 * entries for the remaining character values are from 1..32.
1346 static const char *const upe_ctrls
[] =
1349 "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G",
1350 "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O",
1351 "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W",
1352 "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_"
1357 * Return a printable string corresponding to the given character value. If
1358 * the character is printable, simply return it as the string. If it is in
1359 * the range specified by table 5-101 in the UPE, return the corresponding
1360 * string. Otherwise, return an octal escape sequence.
1367 static char mbch
[MB_LEN_MAX
+1];
1368 static char buf
[5 * MB_LEN_MAX
+ 1];
1370 if ((n
= wctomb(mbch
, c
)) == -1) {
1371 /* Should never happen */
1372 (void) sprintf(buf
, "\\%x", c
);
1378 } else if (c
== 127) {
1379 return (upe_ctrls
[0]);
1380 } else if (c
< 32) {
1381 /* Print as in Table 5-101 in the UPE */
1382 return (upe_ctrls
[c
+1]);
1384 /* Print as an octal escape sequence */
1385 for (len
= 0, ptr
= (unsigned char *) mbch
; 0 < n
; --n
, ++ptr
)
1386 len
+= sprintf(buf
+len
, "\\%03o", *ptr
);
1392 wcoff(const wchar_t *astring
, const int off
)
1394 const wchar_t *s
= astring
;
1396 char mb
[MB_LEN_MAX
];
1400 if ((n
= wctomb(mb
, *s
)) == 0)
1408 return (s
- astring
);
1412 #define NREGHOLD 1024 /* max number unused entries */
1414 static int nregunref
;
1418 struct regcache
*regcachep
;
1426 struct reghashq hash
;
1429 static struct qelem reghash
[NREGHASH
], reglink
;
1432 * Generate a hash value of the given wchar string.
1433 * The hashing method is similar to what Java does for strings.
1436 regtxthash(const wchar_t *str
)
1440 while (*str
!= L
'\0')
1441 k
= (31 * k
) + *str
++;
1448 return (k
% NREGHASH
);
1452 int_regwcomp(REGEXP
*r
, const wchar_t *pattern
)
1459 struct regcache
*rcp
;
1461 key
= regtxthash(pattern
);
1462 for (qp
= reghash
[key
].q_forw
; qp
!= NULL
; qp
= qp
->q_forw
) {
1463 rcp
= ((struct reghashq
*)qp
)->regcachep
;
1464 if (*rcp
->pattern
== *pattern
&&
1465 wcscmp(rcp
->pattern
, pattern
) == 0)
1469 /* update link. put this one at the beginning */
1470 if (rcp
!= (struct regcache
*)reglink
.q_forw
) {
1472 insque(&rcp
->lq
, ®link
);
1474 if (rcp
->refcnt
== 0)
1475 nregunref
--; /* no longer unref'ed */
1477 *(struct regcache
**)r
= rcp
;
1481 if ((mbpattern
= wcstombsdup((wchar_t *)pattern
)) == NULL
)
1482 return (REG_ESPACE
);
1484 ret
= regcomp(&re
, mbpattern
, REG_EXTENDED
);
1491 if ((rcp
= malloc(sizeof (struct regcache
))) == NULL
)
1492 return (REG_ESPACE
);
1494 if ((rcp
->pattern
= wsdup(pattern
)) == NULL
) {
1497 return (REG_ESPACE
);
1500 insque(&rcp
->lq
, ®link
);
1501 insque(&rcp
->hash
.hq
, ®hash
[key
]);
1502 rcp
->hash
.regcachep
= rcp
;
1504 *(struct regcache
**)r
= rcp
;
1509 int_regwfree(REGEXP r
)
1512 struct qelem
*qp
, *nqp
;
1513 struct regcache
*rcp
;
1515 rcp
= (struct regcache
*)r
;
1517 if (--rcp
->refcnt
!= 0)
1520 /* this cache has no reference */
1521 if (++nregunref
< NREGHOLD
)
1525 * We've got too much unref'ed regex. Free half of least
1529 for (qp
= reglink
.q_forw
; qp
!= NULL
; qp
= nqp
) {
1531 rcp
= (struct regcache
*)qp
;
1532 if (rcp
->refcnt
!= 0)
1535 /* free half of them */
1536 if (++cnt
< (NREGHOLD
/ 2))
1539 /* detach and free */
1541 remque(&rcp
->hash
.hq
);
1553 int_regwerror(int errcode
, REGEXP r
, char *errbuf
, size_t bufsiz
)
1555 struct regcache
*rcp
;
1557 rcp
= (struct regcache
*)r
;
1558 return (regerror(errcode
, &rcp
->re
, errbuf
, bufsiz
));
1562 int_regwexec(REGEXP r
, /* compiled RE */
1563 const wchar_t *astring
, /* subject string */
1564 size_t nsub
, /* number of subexpressions */
1565 int_regwmatch_t
*sub
, /* subexpression pointers */
1569 regmatch_t
*mbsub
= NULL
;
1571 struct regcache
*rcp
;
1573 if ((mbs
= wcstombsdup((wchar_t *)astring
)) == NULL
)
1574 return (REG_ESPACE
);
1576 if (nsub
> 0 && sub
) {
1577 if ((mbsub
= malloc(nsub
* sizeof (regmatch_t
))) == NULL
)
1578 return (REG_ESPACE
);
1581 rcp
= (struct regcache
*)r
;
1583 i
= regexec(&rcp
->re
, mbs
, nsub
, mbsub
, flags
);
1585 /* Now, adjust the pointers/counts in sub */
1586 if (i
== REG_OK
&& nsub
> 0 && mbsub
) {
1589 for (j
= 0; j
< nsub
; j
++) {
1590 regmatch_t
*ms
= &mbsub
[j
];
1591 int_regwmatch_t
*ws
= &sub
[j
];
1593 if ((k
= ms
->rm_so
) >= 0) {
1594 ws
->rm_so
= wcoff(astring
, k
);
1595 ws
->rm_sp
= astring
+ ws
->rm_so
;
1597 if ((k
= ms
->rm_eo
) >= 0) {
1598 ws
->rm_eo
= wcoff(astring
, k
);
1599 ws
->rm_ep
= astring
+ ws
->rm_eo
;
1611 int_regwdosuba(REGEXP rp
, /* compiled RE: Pattern */
1612 const wchar_t *rpl
, /* replacement string: /rpl/ */
1613 const wchar_t *src
, /* source string */
1614 wchar_t **dstp
, /* destination string */
1615 int len
, /* destination length */
1616 int *globp
) /* IN: occurence, 0 for all; OUT: substitutions */
1618 wchar_t *dst
, *odst
;
1619 const wchar_t *ip
, *xp
;
1623 int glob
, iglob
= *globp
, oglob
= 0;
1625 int_regwmatch_t rm
[NSUB
], *rmp
;
1630 /* handle overflow of dst. we need "i" more bytes */
1633 #define OVERFLOW(i) { \
1634 int pos = op - dst; \
1635 dst = (wchar_t *)realloc(odst = dst, \
1636 (len += len + i) * sizeof (wchar_t)); \
1644 *dstp
= dst
= (wchar_t *)malloc(len
* sizeof (wchar_t));
1646 return (REG_ESPACE
);
1648 if (rp
== NULL
|| rpl
== NULL
|| src
== NULL
|| dst
== NULL
)
1649 return (REG_EFATAL
);
1651 glob
= 0; /* match count */
1652 ip
= src
; /* source position */
1653 op
= dst
; /* destination position */
1657 while ((regerr
= int_regwexec(rp
, ip
, NSUB
, rm
, flags
)) == REG_OK
) {
1658 /* Copy text preceding match */
1659 if (op
+ (i
= rm
[0].rm_sp
- ip
) >= end
)
1664 if (iglob
== 0 || ++glob
== iglob
) {
1666 xp
= rpl
; /* do substitute */
1668 xp
= L
"&"; /* preserve text */
1670 /* Perform replacement of matched substing */
1671 while ((c
= *xp
++) != '\0') {
1675 else if (c
== '\\') {
1676 if ('0' <= *xp
&& *xp
<= '9')
1677 rmp
= &rm
[*xp
++ - '0'];
1678 else if (*xp
!= '\0')
1682 if (rmp
== NULL
) { /* Ordinary character. */
1686 } else if (rmp
->rm_sp
!= NULL
&& rmp
->rm_ep
!= NULL
) {
1688 if (op
+ (i
= rmp
->rm_ep
- rmp
->rm_sp
) >= end
)
1696 if (*ip
== '\0') /* If at end break */
1698 else if (rm
[0].rm_sp
== rm
[0].rm_ep
) {
1699 /* If empty match copy next char */
1707 if (regerr
!= REG_OK
&& regerr
!= REG_NOMATCH
)
1710 /* Copy rest of text */
1711 if (op
+ (i
= wcslen(ip
)) >= end
)
1717 if ((*dstp
= dst
= (wchar_t *)realloc(odst
= dst
,
1718 sizeof (wchar_t) * (size_t)(op
- dst
))) == NULL
) {
1721 return (REG_ESPACE
);
1726 return ((oglob
== 0) ? REG_NOMATCH
: REG_OK
);