8672 proc_t changes broke genunix dmods and walker
[unleashed.git] / usr / src / cmd / sed / process.c
blobf5c8cf0b25f315658b05b9808018737918630f19
1 /*
2 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
3 * Copyright (c) 1992 Diomidis Spinellis.
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Diomidis Spinellis of Imperial College, University of London.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/types.h>
36 #include <sys/stat.h>
38 #include <ctype.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <limits.h>
43 #include <regex.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 #include <termio.h>
51 #include <libintl.h>
52 #include <note.h>
54 #include "defs.h"
55 #include "extern.h"
57 static SPACE HS, PS, SS, YS;
58 #define pd PS.deleted
59 #define ps PS.space
60 #define psl PS.len
61 #define hs HS.space
62 #define hsl HS.len
64 static int applies(struct s_command *);
65 static void do_tr(struct s_tr *);
66 static void flush_appends(void);
67 static void lputs(char *, size_t);
68 static int regexec_e(regex_t *, const char *, int, int, size_t);
69 static void regsub(SPACE *, char *, char *);
70 static int substitute(struct s_command *);
72 struct s_appends *appends; /* Array of pointers to strings to append. */
73 static int appendx; /* Index into appends array. */
74 int appendnum; /* Size of appends array. */
76 static int lastaddr; /* Set by applies if last address of a range. */
77 static int sdone; /* If any substitutes since last line input. */
78 /* Iov structure for 'w' commands. */
79 static regex_t *defpreg;
80 size_t maxnsub;
81 regmatch_t *match;
83 #define OUT() do { \
84 (void) fwrite(ps, 1, psl, outfile); \
85 (void) fputc('\n', outfile); \
86 _NOTE(CONSTCOND) \
87 } while (0)
89 void
90 process(void)
92 struct s_command *cp;
93 SPACE tspace;
94 size_t oldpsl = 0;
95 char *p;
97 p = NULL;
99 for (linenum = 0; mf_fgets(&PS, REPLACE); /* NOP */) {
100 pd = 0;
101 top:
102 cp = prog;
103 redirect:
104 while (cp != NULL) {
105 if (!applies(cp)) {
106 cp = cp->next;
107 continue;
109 switch (cp->code) {
110 case '{':
111 cp = cp->u.c;
112 goto redirect;
113 case 'a':
114 if (appendx >= appendnum)
115 if ((appends = realloc(appends,
116 sizeof (struct s_appends) *
117 (appendnum *= 2))) == NULL)
118 err(1, "realloc");
119 appends[appendx].type = AP_STRING;
120 appends[appendx].s = cp->t;
121 appends[appendx].len = strlen(cp->t);
122 appendx++;
123 break;
124 case 'b':
125 cp = cp->u.c;
126 goto redirect;
127 case 'c':
128 pd = 1;
129 psl = 0;
130 if (cp->a2 == NULL || lastaddr || lastline())
131 (void) fprintf(outfile, "%s", cp->t);
132 break;
133 case 'd':
134 pd = 1;
135 goto new;
136 case 'D':
137 if (pd)
138 goto new;
139 if (psl == 0 ||
140 (p = memchr(ps, '\n', psl)) == NULL) {
141 pd = 1;
142 goto new;
143 } else {
144 psl -=
145 (uintptr_t)(p + 1) - (uintptr_t)ps;
146 (void) memmove(ps, p + 1, psl);
147 goto top;
149 case 'g':
150 cspace(&PS, hs, hsl, REPLACE);
151 break;
152 case 'G':
153 cspace(&PS, "\n", 1, APPEND);
154 cspace(&PS, hs, hsl, APPEND);
155 break;
156 case 'h':
157 cspace(&HS, ps, psl, REPLACE);
158 break;
159 case 'H':
160 cspace(&HS, "\n", 1, APPEND);
161 cspace(&HS, ps, psl, APPEND);
162 break;
163 case 'i':
164 (void) fprintf(outfile, "%s", cp->t);
165 break;
166 case 'l':
167 lputs(ps, psl);
168 break;
169 case 'n':
170 if (!nflag && !pd)
171 OUT();
172 flush_appends();
173 if (!mf_fgets(&PS, REPLACE))
174 exit(0);
175 pd = 0;
176 break;
177 case 'N':
178 flush_appends();
179 cspace(&PS, "\n", 1, APPEND);
180 if (!mf_fgets(&PS, APPEND))
181 exit(0);
182 break;
183 case 'p':
184 if (pd)
185 break;
186 OUT();
187 break;
188 case 'P':
189 if (pd)
190 break;
191 if ((p = memchr(ps, '\n', psl)) != NULL) {
192 oldpsl = psl;
193 psl = (uintptr_t)p - (uintptr_t)ps;
195 OUT();
196 if (p != NULL)
197 psl = oldpsl;
198 break;
199 case 'q':
200 if (!nflag && !pd)
201 OUT();
202 flush_appends();
203 exit(0);
204 /*NOTREACHED*/
205 case 'r':
206 if (appendx >= appendnum)
207 if ((appends = realloc(appends,
208 sizeof (struct s_appends) *
209 (appendnum *= 2))) == NULL)
210 err(1, "realloc");
211 appends[appendx].type = AP_FILE;
212 appends[appendx].s = cp->t;
213 appends[appendx].len = strlen(cp->t);
214 appendx++;
215 break;
216 case 's':
217 sdone |= substitute(cp);
218 break;
219 case 't':
220 if (sdone) {
221 sdone = 0;
222 cp = cp->u.c;
223 goto redirect;
225 break;
226 case 'w':
227 if (pd)
228 break;
229 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
230 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666))
231 == -1)
232 err(1, "%s", cp->t);
233 if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
234 write(cp->u.fd, "\n", 1) != 1)
235 err(1, "%s", cp->t);
236 break;
237 case 'x':
239 * If the hold space is null, make it empty
240 * but not null. Otherwise the pattern space
241 * will become null after the swap, which is
242 * an abnormal condition.
244 if (hs == NULL)
245 cspace(&HS, "", 0, REPLACE);
246 tspace = PS;
247 PS = HS;
248 HS = tspace;
249 break;
250 case 'y':
251 if (pd || psl == 0)
252 break;
253 do_tr(cp->u.y);
254 break;
255 case ':':
256 case '}':
257 break;
258 case '=':
259 (void) fprintf(outfile, "%lu\n", linenum);
261 cp = cp->next;
262 } /* for all cp */
264 new: if (!nflag && !pd)
265 OUT();
266 flush_appends();
267 } /* for all lines */
271 * TRUE if the address passed matches the current program state
272 * (lastline, linenumber, ps).
274 #define MATCH(a) \
275 ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
276 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
279 * Return TRUE if the command applies to the current line. Sets the start
280 * line for process ranges. Interprets the non-select (``!'') flag.
282 static int
283 applies(struct s_command *cp)
285 int r;
287 lastaddr = 0;
288 if (cp->a1 == NULL && cp->a2 == NULL)
289 r = 1;
290 else if (cp->a2)
291 if (cp->startline > 0) {
292 if (MATCH(cp->a2)) {
293 cp->startline = 0;
294 lastaddr = 1;
295 r = 1;
296 } else if (linenum - cp->startline <= cp->a2->u.l)
297 r = 1;
298 else if ((cp->a2->type == AT_LINE &&
299 linenum > cp->a2->u.l) ||
300 (cp->a2->type == AT_RELLINE &&
301 linenum - cp->startline > cp->a2->u.l)) {
303 * We missed the 2nd address due to a branch,
304 * so just close the range and return false.
306 cp->startline = 0;
307 r = 0;
308 } else
309 r = 1;
310 } else if (MATCH(cp->a1)) {
312 * If the second address is a number less than or
313 * equal to the line number first selected, only
314 * one line shall be selected.
315 * -- POSIX 1003.2
316 * Likewise if the relative second line address is zero.
318 if ((cp->a2->type == AT_LINE &&
319 linenum >= cp->a2->u.l) ||
320 (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
321 lastaddr = 1;
322 else {
323 cp->startline = linenum;
325 r = 1;
326 } else
327 r = 0;
328 else
329 r = MATCH(cp->a1);
330 return (cp->nonsel ? ! r : r);
334 * Reset the sed processor to its initial state.
336 void
337 resetstate(void)
339 struct s_command *cp;
342 * Reset all in-range markers.
344 for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
345 if (cp->a2)
346 cp->startline = 0;
349 * Clear out the hold space.
351 cspace(&HS, "", 0, REPLACE);
355 * substitute --
356 * Do substitutions in the pattern space. Currently, we build a
357 * copy of the new pattern space in the substitute space structure
358 * and then swap them.
360 static int
361 substitute(struct s_command *cp)
363 SPACE tspace;
364 regex_t *re;
365 regoff_t re_off, slen;
366 int lastempty, n;
367 char *s;
369 s = ps;
370 re = cp->u.s->re;
371 if (re == NULL) {
372 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
373 linenum = cp->u.s->linenum;
374 fatal(_("\\%u not defined in the RE"),
375 cp->u.s->maxbref);
378 if (!regexec_e(re, s, 0, 0, psl))
379 return (0);
381 SS.len = 0; /* Clean substitute space. */
382 slen = psl;
383 n = cp->u.s->n;
384 lastempty = 1;
386 switch (n) {
387 case 0: /* Global */
388 do {
389 if (lastempty || match[0].rm_so != match[0].rm_eo) {
390 /* Locate start of replaced string. */
391 re_off = match[0].rm_so;
392 /* Copy leading retained string. */
393 cspace(&SS, s, re_off, APPEND);
394 /* Add in regular expression. */
395 regsub(&SS, s, cp->u.s->new);
398 /* Move past this match. */
399 if (match[0].rm_so != match[0].rm_eo) {
400 s += match[0].rm_eo;
401 slen -= match[0].rm_eo;
402 lastempty = 0;
403 } else {
404 if (match[0].rm_so < slen)
405 cspace(&SS, s + match[0].rm_so, 1,
406 APPEND);
407 s += match[0].rm_so + 1;
408 slen -= match[0].rm_so + 1;
409 lastempty = 1;
411 } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
412 /* Copy trailing retained string. */
413 if (slen > 0)
414 cspace(&SS, s, slen, APPEND);
415 break;
416 default: /* Nth occurrence */
417 while (--n) {
418 if (match[0].rm_eo == match[0].rm_so)
419 match[0].rm_eo = match[0].rm_so + 1;
420 s += match[0].rm_eo;
421 slen -= match[0].rm_eo;
422 if (slen < 0)
423 return (0);
424 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
425 return (0);
427 /* FALLTHROUGH */
428 case 1: /* 1st occurrence */
429 /* Locate start of replaced string. */
430 re_off = match[0].rm_so + ((uintptr_t)s - (uintptr_t)ps);
431 /* Copy leading retained string. */
432 cspace(&SS, ps, re_off, APPEND);
433 /* Add in regular expression. */
434 regsub(&SS, s, cp->u.s->new);
435 /* Copy trailing retained string. */
436 s += match[0].rm_eo;
437 slen -= match[0].rm_eo;
438 cspace(&SS, s, slen, APPEND);
439 break;
443 * Swap the substitute space and the pattern space, and make sure
444 * that any leftover pointers into stdio memory get lost.
446 tspace = PS;
447 PS = SS;
448 SS = tspace;
449 SS.space = SS.back;
451 /* Handle the 'p' flag. */
452 if (cp->u.s->p)
453 OUT();
455 /* Handle the 'w' flag. */
456 if (cp->u.s->wfile && !pd) {
457 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
458 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1)
459 err(1, "%s", cp->u.s->wfile);
460 if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
461 write(cp->u.s->wfd, "\n", 1) != 1)
462 err(1, "%s", cp->u.s->wfile);
464 return (1);
468 * do_tr --
469 * Perform translation ('y' command) in the pattern space.
471 static void
472 do_tr(struct s_tr *y)
474 SPACE tmp;
475 char c, *p;
476 size_t clen, left;
477 int i;
479 if (MB_CUR_MAX == 1) {
481 * Single-byte encoding: perform in-place translation
482 * of the pattern space.
484 for (p = ps; p < &ps[psl]; p++)
485 *p = y->bytetab[(uchar_t)*p];
486 } else {
488 * Multi-byte encoding: perform translation into the
489 * translation space, then swap the translation and
490 * pattern spaces.
492 /* Clean translation space. */
493 YS.len = 0;
494 for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
495 if ((c = y->bytetab[(uchar_t)*p]) != '\0') {
496 cspace(&YS, &c, 1, APPEND);
497 clen = 1;
498 continue;
500 for (i = 0; i < y->nmultis; i++)
501 if (left >= y->multis[i].fromlen &&
502 memcmp(p, y->multis[i].from,
503 y->multis[i].fromlen) == 0)
504 break;
505 if (i < y->nmultis) {
506 cspace(&YS, y->multis[i].to,
507 y->multis[i].tolen, APPEND);
508 clen = y->multis[i].fromlen;
509 } else {
510 cspace(&YS, p, 1, APPEND);
511 clen = 1;
514 /* Swap the translation space and the pattern space. */
515 tmp = PS;
516 PS = YS;
517 YS = tmp;
518 YS.space = YS.back;
523 * Flush append requests. Always called before reading a line,
524 * therefore it also resets the substitution done (sdone) flag.
526 static void
527 flush_appends(void)
529 FILE *f;
530 int count, i;
531 char buf[8 * 1024];
533 for (i = 0; i < appendx; i++)
534 switch (appends[i].type) {
535 case AP_STRING:
536 (void) fwrite(appends[i].s, sizeof (char),
537 appends[i].len, outfile);
538 break;
539 case AP_FILE:
541 * Read files probably shouldn't be cached. Since
542 * it's not an error to read a non-existent file,
543 * it's possible that another program is interacting
544 * with the sed script through the filesystem. It
545 * would be truly bizarre, but possible. It's probably
546 * not that big a performance win, anyhow.
548 if ((f = fopen(appends[i].s, "r")) == NULL)
549 break;
550 while ((count =
551 fread(buf, sizeof (char), sizeof (buf), f)))
552 (void) fwrite(buf, sizeof (char), count,
553 outfile);
554 (void) fclose(f);
555 break;
557 if (ferror(outfile))
558 errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
559 appendx = sdone = 0;
562 static void
563 lputs(char *s, size_t len)
565 static const char escapes[] = "\\\a\b\f\r\t\v";
566 int c, col, width;
567 const char *p;
568 struct winsize win;
569 static int termwidth = -1;
570 size_t clen, i;
571 wchar_t wc;
572 mbstate_t mbs;
574 if (outfile != stdout)
575 termwidth = 60;
576 if (termwidth == -1) {
577 if (((p = getenv("COLUMNS")) != NULL) && (*p != '\0'))
578 termwidth = atoi(p);
579 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
580 win.ws_col > 0)
581 termwidth = win.ws_col;
582 else
583 termwidth = 60;
585 if (termwidth <= 0)
586 termwidth = 1;
588 (void) memset(&mbs, 0, sizeof (mbs));
589 col = 0;
590 while (len != 0) {
591 clen = mbrtowc(&wc, s, len, &mbs);
592 if (clen == 0)
593 clen = 1;
594 if (clen == (size_t)-1 || clen == (size_t)-2) {
595 wc = (unsigned char)*s;
596 clen = 1;
597 (void) memset(&mbs, 0, sizeof (mbs));
599 if (wc == '\n') {
600 if (col + 1 >= termwidth)
601 (void) fprintf(outfile, "\\\n");
602 (void) fputc('$', outfile);
603 (void) fputc('\n', outfile);
604 col = 0;
605 } else if (iswprint(wc)) {
606 width = wcwidth(wc);
607 if (col + width >= termwidth) {
608 (void) fprintf(outfile, "\\\n");
609 col = 0;
611 (void) fwrite(s, 1, clen, outfile);
612 col += width;
613 } else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
614 (p = strchr(escapes, c)) != NULL) {
615 if (col + 2 >= termwidth) {
616 (void) fprintf(outfile, "\\\n");
617 col = 0;
619 (void) fprintf(outfile, "\\%c",
620 "\\abfrtv"[(uintptr_t)p - (uintptr_t)escapes]);
621 col += 2;
622 } else {
623 if (col + 4 * clen >= (unsigned)termwidth) {
624 (void) fprintf(outfile, "\\\n");
625 col = 0;
627 for (i = 0; i < clen; i++)
628 (void) fprintf(outfile, "\\%03o",
629 (int)(unsigned char)s[i]);
630 col += 4 * clen;
632 s += clen;
633 len -= clen;
635 if (col + 1 >= termwidth)
636 (void) fprintf(outfile, "\\\n");
637 (void) fputc('$', outfile);
638 (void) fputc('\n', outfile);
639 if (ferror(outfile))
640 errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
643 static int
644 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
645 size_t slen)
647 int eval;
649 if (preg == NULL) {
650 if (defpreg == NULL)
651 fatal(_("first RE may not be empty"));
652 } else
653 defpreg = preg;
655 /* Set anchors */
656 match[0].rm_so = 0;
657 match[0].rm_eo = slen;
659 eval = regexec(defpreg, string,
660 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
661 switch (eval) {
662 case 0:
663 return (1);
664 case REG_NOMATCH:
665 return (0);
667 fatal(_("RE error: %s"), strregerror(eval, defpreg));
668 return (0);
672 * regsub - perform substitutions after a regexp match
673 * Based on a routine by Henry Spencer
675 static void
676 regsub(SPACE *sp, char *string, char *src)
678 int len, no;
679 char c, *dst;
681 #define NEEDSP(reqlen) \
682 /* XXX What is the +1 for? */ \
683 if (sp->len + (reqlen) + 1 >= sp->blen) { \
684 sp->blen += (reqlen) + 1024; \
685 if ((sp->back = realloc(sp->back, sp->blen)) == NULL) \
686 err(1, "realloc"); \
687 sp->space = sp->back; \
688 dst = sp->space + sp->len; \
691 dst = sp->space + sp->len;
692 while ((c = *src++) != '\0') {
693 if (c == '&')
694 no = 0;
695 else if (c == '\\' && isdigit((unsigned char)*src))
696 no = *src++ - '0';
697 else
698 no = -1;
699 if (no < 0) { /* Ordinary character. */
700 if (c == '\\' && (*src == '\\' || *src == '&'))
701 c = *src++;
702 NEEDSP(1);
703 *dst++ = c;
704 ++sp->len;
705 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
706 len = match[no].rm_eo - match[no].rm_so;
707 NEEDSP(len);
708 (void) memmove(dst, string + match[no].rm_so, len);
709 dst += len;
710 sp->len += len;
713 NEEDSP(1);
714 *dst = '\0';
718 * cspace --
719 * Concatenate space: append the source space to the destination space,
720 * allocating new space as necessary.
722 void
723 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
725 size_t tlen;
727 /* Make sure SPACE has enough memory and ramp up quickly. */
728 tlen = sp->len + len + 1;
729 if (tlen > sp->blen) {
730 sp->blen = tlen + 1024;
731 if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
732 NULL)
733 err(1, "realloc");
736 if (spflag == REPLACE)
737 sp->len = 0;
739 (void) memmove(sp->space + sp->len, p, len);
741 sp->space[sp->len += len] = '\0';
745 * Close all cached opened files and report any errors
747 void
748 cfclose(struct s_command *cp, struct s_command *end)
751 for (; cp != end; cp = cp->next)
752 switch (cp->code) {
753 case 's':
754 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
755 err(1, "%s", cp->u.s->wfile);
756 cp->u.s->wfd = -1;
757 break;
758 case 'w':
759 if (cp->u.fd != -1 && close(cp->u.fd))
760 err(1, "%s", cp->t);
761 cp->u.fd = -1;
762 break;
763 case '{':
764 cfclose(cp->u.c, cp->next);
765 break;