If there is no commitid, we need to initialize Delta->commitid to NULL to
[dragonfly.git] / usr.bin / sed / process.c
blob7c27d9489d71924c7ddf4f834a75b624b589b260
1 /*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
37 * @(#)process.c 8.6 (Berkeley) 4/20/94
38 * $FreeBSD: src/usr.bin/sed/process.c,v 1.10.2.10 2003/06/13 07:32:08 fanf Exp $
39 * $DragonFly: src/usr.bin/sed/process.c,v 1.5 2003/11/04 15:48:16 drhodus Exp $
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <sys/ioctl.h>
45 #include <sys/uio.h>
47 #include <ctype.h>
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <limits.h>
52 #include <regex.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
58 #include "defs.h"
59 #include "extern.h"
61 static SPACE HS, PS, SS;
62 #define pd PS.deleted
63 #define ps PS.space
64 #define psl PS.len
65 #define hs HS.space
66 #define hsl HS.len
68 static __inline int applies(struct s_command *);
69 static void flush_appends(void);
70 static void lputs(char *);
71 static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
72 static void regsub(SPACE *, char *, char *);
73 static int substitute(struct s_command *);
75 struct s_appends *appends; /* Array of pointers to strings to append. */
76 static int appendx; /* Index into appends array. */
77 int appendnum; /* Size of appends array. */
79 static int lastaddr; /* Set by applies if last address of a range. */
80 static int sdone; /* If any substitutes since last line input. */
81 /* Iov structure for 'w' commands. */
82 static regex_t *defpreg;
83 size_t maxnsub;
84 regmatch_t *match;
86 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); putchar('\n'); }
88 void
89 process(void)
91 struct s_command *cp;
92 SPACE tspace;
93 size_t len, oldpsl = 0;
94 char *p;
96 p = NULL;
98 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
99 pd = 0;
100 top:
101 cp = prog;
102 redirect:
103 while (cp != NULL) {
104 if (!applies(cp)) {
105 cp = cp->next;
106 continue;
108 switch (cp->code) {
109 case '{':
110 cp = cp->u.c;
111 goto redirect;
112 case 'a':
113 if (appendx >= appendnum)
114 if ((appends = realloc(appends,
115 sizeof(struct s_appends) *
116 (appendnum *= 2))) == NULL)
117 err(1, "realloc");
118 appends[appendx].type = AP_STRING;
119 appends[appendx].s = cp->t;
120 appends[appendx].len = strlen(cp->t);
121 appendx++;
122 break;
123 case 'b':
124 cp = cp->u.c;
125 goto redirect;
126 case 'c':
127 pd = 1;
128 psl = 0;
129 if (cp->a2 == NULL || lastaddr)
130 (void)printf("%s", cp->t);
131 break;
132 case 'd':
133 pd = 1;
134 goto new;
135 case 'D':
136 if (pd)
137 goto new;
138 if (psl == 0 ||
139 (p = memchr(ps, '\n', psl)) == NULL) {
140 pd = 1;
141 goto new;
142 } else {
143 psl -= (p + 1) - ps;
144 memmove(ps, p + 1, psl);
145 goto top;
147 case 'g':
148 cspace(&PS, hs, hsl, REPLACE);
149 break;
150 case 'G':
151 cspace(&PS, "\n", 1, 0);
152 cspace(&PS, hs, hsl, 0);
153 break;
154 case 'h':
155 cspace(&HS, ps, psl, REPLACE);
156 break;
157 case 'H':
158 cspace(&HS, "\n", 1, 0);
159 cspace(&HS, ps, psl, 0);
160 break;
161 case 'i':
162 (void)printf("%s", cp->t);
163 break;
164 case 'l':
165 lputs(ps);
166 break;
167 case 'n':
168 if (!nflag && !pd)
169 OUT(ps)
170 flush_appends();
171 if (!mf_fgets(&PS, REPLACE))
172 exit(0);
173 pd = 0;
174 break;
175 case 'N':
176 flush_appends();
177 cspace(&PS, "\n", 1, 0);
178 if (!mf_fgets(&PS, 0))
179 exit(0);
180 break;
181 case 'p':
182 if (pd)
183 break;
184 OUT(ps)
185 break;
186 case 'P':
187 if (pd)
188 break;
189 if (psl != 0 &&
190 (p = memchr(ps, '\n', psl)) != NULL) {
191 oldpsl = psl;
192 psl = p - ps;
194 OUT(ps)
195 if (p != NULL)
196 psl = oldpsl;
197 break;
198 case 'q':
199 if (!nflag && !pd)
200 OUT(ps)
201 flush_appends();
202 exit(0);
203 case 'r':
204 if (appendx >= appendnum)
205 if ((appends = realloc(appends,
206 sizeof(struct s_appends) *
207 (appendnum *= 2))) == NULL)
208 err(1, "realloc");
209 appends[appendx].type = AP_FILE;
210 appends[appendx].s = cp->t;
211 appends[appendx].len = strlen(cp->t);
212 appendx++;
213 break;
214 case 's':
215 sdone |= substitute(cp);
216 break;
217 case 't':
218 if (sdone) {
219 sdone = 0;
220 cp = cp->u.c;
221 goto redirect;
223 break;
224 case 'w':
225 if (pd)
226 break;
227 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
228 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
229 DEFFILEMODE)) == -1)
230 err(1, "%s", cp->t);
231 if (write(cp->u.fd, ps, psl) != psl ||
232 write(cp->u.fd, "\n", 1) != 1)
233 err(1, "%s", cp->t);
234 break;
235 case 'x':
236 if (hs == NULL)
237 cspace(&HS, "", 0, REPLACE);
238 tspace = PS;
239 PS = HS;
240 HS = tspace;
241 break;
242 case 'y':
243 if (pd || psl == 0)
244 break;
245 for (p = ps, len = psl; len--; ++p)
246 *p = cp->u.y[(unsigned char)*p];
247 break;
248 case ':':
249 case '}':
250 break;
251 case '=':
252 (void)printf("%lu\n", linenum);
254 cp = cp->next;
255 } /* for all cp */
257 new: if (!nflag && !pd)
258 OUT(ps)
259 flush_appends();
260 } /* for all lines */
264 * TRUE if the address passed matches the current program state
265 * (lastline, linenumber, ps).
267 #define MATCH(a) \
268 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
269 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
272 * Return TRUE if the command applies to the current line. Sets the inrange
273 * flag to process ranges. Interprets the non-select (``!'') flag.
275 static __inline int
276 applies(struct s_command *cp)
278 int r;
280 lastaddr = 0;
281 if (cp->a1 == NULL && cp->a2 == NULL)
282 r = 1;
283 else if (cp->a2)
284 if (cp->inrange) {
285 if (MATCH(cp->a2)) {
286 cp->inrange = 0;
287 lastaddr = 1;
289 r = 1;
290 } else if (MATCH(cp->a1)) {
292 * If the second address is a number less than or
293 * equal to the line number first selected, only
294 * one line shall be selected.
295 * -- POSIX 1003.2
297 if (cp->a2->type == AT_LINE &&
298 linenum >= cp->a2->u.l)
299 lastaddr = 1;
300 else
301 cp->inrange = 1;
302 r = 1;
303 } else
304 r = 0;
305 else
306 r = MATCH(cp->a1);
307 return (cp->nonsel ? ! r : r);
311 * substitute --
312 * Do substitutions in the pattern space. Currently, we build a
313 * copy of the new pattern space in the substitute space structure
314 * and then swap them.
316 static int
317 substitute(struct s_command *cp)
319 SPACE tspace;
320 regex_t *re;
321 regoff_t re_off, slen;
322 int lastempty, n;
323 char *s;
325 s = ps;
326 re = cp->u.s->re;
327 if (re == NULL) {
328 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
329 linenum = cp->u.s->linenum;
330 errx(1, "%lu: %s: \\%d not defined in the RE",
331 linenum, fname, cp->u.s->maxbref);
334 if (!regexec_e(re, s, 0, 0, psl))
335 return (0);
337 SS.len = 0; /* Clean substitute space. */
338 slen = psl;
339 n = cp->u.s->n;
340 lastempty = 1;
342 switch (n) {
343 case 0: /* Global */
344 do {
345 if (lastempty || match[0].rm_so != match[0].rm_eo) {
346 /* Locate start of replaced string. */
347 re_off = match[0].rm_so;
348 /* Copy leading retained string. */
349 cspace(&SS, s, re_off, APPEND);
350 /* Add in regular expression. */
351 regsub(&SS, s, cp->u.s->new);
354 /* Move past this match. */
355 if (match[0].rm_so != match[0].rm_eo) {
356 s += match[0].rm_eo;
357 slen -= match[0].rm_eo;
358 lastempty = 0;
359 } else {
360 if (match[0].rm_so < slen)
361 cspace(&SS, s + match[0].rm_so, 1,
362 APPEND);
363 s += match[0].rm_so + 1;
364 slen -= match[0].rm_so + 1;
365 lastempty = 1;
367 } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
368 /* Copy trailing retained string. */
369 if (slen > 0)
370 cspace(&SS, s, slen, APPEND);
371 break;
372 default: /* Nth occurrence */
373 while (--n) {
374 if (match[0].rm_eo == match[0].rm_so)
375 match[0].rm_eo = match[0].rm_so + 1;
376 s += match[0].rm_eo;
377 slen -= match[0].rm_eo;
378 if (slen < 0)
379 return (0);
380 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
381 return (0);
383 /* FALLTHROUGH */
384 case 1: /* 1st occurrence */
385 /* Locate start of replaced string. */
386 re_off = match[0].rm_so + (s - ps);
387 /* Copy leading retained string. */
388 cspace(&SS, ps, re_off, APPEND);
389 /* Add in regular expression. */
390 regsub(&SS, s, cp->u.s->new);
391 /* Copy trailing retained string. */
392 s += match[0].rm_eo;
393 slen -= match[0].rm_eo;
394 cspace(&SS, s, slen, APPEND);
395 break;
399 * Swap the substitute space and the pattern space, and make sure
400 * that any leftover pointers into stdio memory get lost.
402 tspace = PS;
403 PS = SS;
404 SS = tspace;
405 SS.space = SS.back;
407 /* Handle the 'p' flag. */
408 if (cp->u.s->p)
409 OUT(ps)
411 /* Handle the 'w' flag. */
412 if (cp->u.s->wfile && !pd) {
413 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
414 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
415 err(1, "%s", cp->u.s->wfile);
416 if (write(cp->u.s->wfd, ps, psl) != psl ||
417 write(cp->u.s->wfd, "\n", 1) != 1)
418 err(1, "%s", cp->u.s->wfile);
420 return (1);
424 * Flush append requests. Always called before reading a line,
425 * therefore it also resets the substitution done (sdone) flag.
427 static void
428 flush_appends(void)
430 FILE *f;
431 int count, i;
432 char buf[8 * 1024];
434 for (i = 0; i < appendx; i++)
435 switch (appends[i].type) {
436 case AP_STRING:
437 fwrite(appends[i].s, sizeof(char), appends[i].len,
438 stdout);
439 break;
440 case AP_FILE:
442 * Read files probably shouldn't be cached. Since
443 * it's not an error to read a non-existent file,
444 * it's possible that another program is interacting
445 * with the sed script through the filesystem. It
446 * would be truly bizarre, but possible. It's probably
447 * not that big a performance win, anyhow.
449 if ((f = fopen(appends[i].s, "r")) == NULL)
450 break;
451 while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
452 (void)fwrite(buf, sizeof(char), count, stdout);
453 (void)fclose(f);
454 break;
456 if (ferror(stdout))
457 errx(1, "stdout: %s", strerror(errno ? errno : EIO));
458 appendx = sdone = 0;
461 static void
462 lputs(char *s)
464 int count;
465 const char *escapes;
466 char *p;
467 struct winsize win;
468 static int termwidth = -1;
470 if (termwidth == -1) {
471 if ((p = getenv("COLUMNS")) && *p != '\0')
472 termwidth = atoi(p);
473 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
474 win.ws_col > 0)
475 termwidth = win.ws_col;
476 else
477 termwidth = 60;
480 for (count = 0; *s; ++s) {
481 if (count + 5 >= termwidth) {
482 (void)printf("\\\n");
483 count = 0;
485 if (isprint((unsigned char)*s) && *s != '\\') {
486 (void)putchar(*s);
487 count++;
488 } else if (*s == '\n') {
489 (void)putchar('$');
490 (void)putchar('\n');
491 count = 0;
492 } else {
493 escapes = "\\\a\b\f\r\t\v";
494 (void)putchar('\\');
495 if ((p = strchr(escapes, *s))) {
496 (void)putchar("\\abfrtv"[p - escapes]);
497 count += 2;
498 } else {
499 (void)printf("%03o", *(u_char *)s);
500 count += 4;
504 (void)putchar('$');
505 (void)putchar('\n');
506 if (ferror(stdout))
507 errx(1, "stdout: %s", strerror(errno ? errno : EIO));
510 static __inline int
511 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
512 size_t slen)
514 int eval;
516 if (preg == NULL) {
517 if (defpreg == NULL)
518 errx(1, "first RE may not be empty");
519 } else
520 defpreg = preg;
522 /* Set anchors */
523 match[0].rm_so = 0;
524 match[0].rm_eo = slen;
526 eval = regexec(defpreg, string,
527 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
528 switch(eval) {
529 case 0:
530 return (1);
531 case REG_NOMATCH:
532 return (0);
534 errx(1, "RE error: %s", strregerror(eval, defpreg));
535 /* NOTREACHED */
539 * regsub - perform substitutions after a regexp match
540 * Based on a routine by Henry Spencer
542 static void
543 regsub(SPACE *sp, char *string, char *src)
545 int len, no;
546 char c, *dst;
548 #define NEEDSP(reqlen) \
549 /* XXX What is the +1 for? */ \
550 if (sp->len + (reqlen) + 1 >= sp->blen) { \
551 sp->blen += (reqlen) + 1024; \
552 if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
553 == NULL) \
554 err(1, "realloc"); \
555 dst = sp->space + sp->len; \
558 dst = sp->space + sp->len;
559 while ((c = *src++) != '\0') {
560 if (c == '&')
561 no = 0;
562 else if (c == '\\' && isdigit((unsigned char)*src))
563 no = *src++ - '0';
564 else
565 no = -1;
566 if (no < 0) { /* Ordinary character. */
567 if (c == '\\' && (*src == '\\' || *src == '&'))
568 c = *src++;
569 NEEDSP(1);
570 *dst++ = c;
571 ++sp->len;
572 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
573 len = match[no].rm_eo - match[no].rm_so;
574 NEEDSP(len);
575 memmove(dst, string + match[no].rm_so, len);
576 dst += len;
577 sp->len += len;
580 NEEDSP(1);
581 *dst = '\0';
585 * aspace --
586 * Append the source space to the destination space, allocating new
587 * space as necessary.
589 void
590 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
592 size_t tlen;
594 /* Make sure SPACE has enough memory and ramp up quickly. */
595 tlen = sp->len + len + 1;
596 if (tlen > sp->blen) {
597 sp->blen = tlen + 1024;
598 if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
599 NULL)
600 err(1, "realloc");
603 if (spflag == REPLACE)
604 sp->len = 0;
606 memmove(sp->space + sp->len, p, len);
608 sp->space[sp->len += len] = '\0';
612 * Close all cached opened files and report any errors
614 void
615 cfclose(struct s_command *cp, struct s_command *end)
618 for (; cp != end; cp = cp->next)
619 switch(cp->code) {
620 case 's':
621 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
622 err(1, "%s", cp->u.s->wfile);
623 cp->u.s->wfd = -1;
624 break;
625 case 'w':
626 if (cp->u.fd != -1 && close(cp->u.fd))
627 err(1, "%s", cp->t);
628 cp->u.fd = -1;
629 break;
630 case '{':
631 cfclose(cp->u.c, cp->next);
632 break;