Change NUL char handling of isspecial()
[git/haiku.git] / grep.c
blobf9a45258aa0485df6f6205aa8121bdad07a480b0
1 #include "cache.h"
2 #include "grep.h"
3 #include "xdiff-interface.h"
5 void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
7 struct grep_pat *p = xcalloc(1, sizeof(*p));
8 p->pattern = pat;
9 p->origin = "header";
10 p->no = 0;
11 p->token = GREP_PATTERN_HEAD;
12 p->field = field;
13 *opt->pattern_tail = p;
14 opt->pattern_tail = &p->next;
15 p->next = NULL;
18 void append_grep_pattern(struct grep_opt *opt, const char *pat,
19 const char *origin, int no, enum grep_pat_token t)
21 struct grep_pat *p = xcalloc(1, sizeof(*p));
22 p->pattern = pat;
23 p->origin = origin;
24 p->no = no;
25 p->token = t;
26 *opt->pattern_tail = p;
27 opt->pattern_tail = &p->next;
28 p->next = NULL;
31 static int isregexspecial(int c)
33 return c == '\0' || is_glob_special(c) ||
34 c == '$' || c == '(' || c == ')' || c == '+' ||
35 c == '.' || c == '^' || c == '{' || c == '|';
38 static int is_fixed(const char *s)
40 while (!isregexspecial(*s))
41 s++;
42 return !*s;
45 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
47 int err;
49 if (opt->fixed || is_fixed(p->pattern))
50 p->fixed = 1;
51 if (opt->regflags & REG_ICASE)
52 p->fixed = 0;
53 if (p->fixed)
54 return;
56 err = regcomp(&p->regexp, p->pattern, opt->regflags);
57 if (err) {
58 char errbuf[1024];
59 char where[1024];
60 if (p->no)
61 sprintf(where, "In '%s' at %d, ",
62 p->origin, p->no);
63 else if (p->origin)
64 sprintf(where, "%s, ", p->origin);
65 else
66 where[0] = 0;
67 regerror(err, &p->regexp, errbuf, 1024);
68 regfree(&p->regexp);
69 die("%s'%s': %s", where, p->pattern, errbuf);
73 static struct grep_expr *compile_pattern_or(struct grep_pat **);
74 static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
76 struct grep_pat *p;
77 struct grep_expr *x;
79 p = *list;
80 switch (p->token) {
81 case GREP_PATTERN: /* atom */
82 case GREP_PATTERN_HEAD:
83 case GREP_PATTERN_BODY:
84 x = xcalloc(1, sizeof (struct grep_expr));
85 x->node = GREP_NODE_ATOM;
86 x->u.atom = p;
87 *list = p->next;
88 return x;
89 case GREP_OPEN_PAREN:
90 *list = p->next;
91 x = compile_pattern_or(list);
92 if (!x)
93 return NULL;
94 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
95 die("unmatched parenthesis");
96 *list = (*list)->next;
97 return x;
98 default:
99 return NULL;
103 static struct grep_expr *compile_pattern_not(struct grep_pat **list)
105 struct grep_pat *p;
106 struct grep_expr *x;
108 p = *list;
109 switch (p->token) {
110 case GREP_NOT:
111 if (!p->next)
112 die("--not not followed by pattern expression");
113 *list = p->next;
114 x = xcalloc(1, sizeof (struct grep_expr));
115 x->node = GREP_NODE_NOT;
116 x->u.unary = compile_pattern_not(list);
117 if (!x->u.unary)
118 die("--not followed by non pattern expression");
119 return x;
120 default:
121 return compile_pattern_atom(list);
125 static struct grep_expr *compile_pattern_and(struct grep_pat **list)
127 struct grep_pat *p;
128 struct grep_expr *x, *y, *z;
130 x = compile_pattern_not(list);
131 p = *list;
132 if (p && p->token == GREP_AND) {
133 if (!p->next)
134 die("--and not followed by pattern expression");
135 *list = p->next;
136 y = compile_pattern_and(list);
137 if (!y)
138 die("--and not followed by pattern expression");
139 z = xcalloc(1, sizeof (struct grep_expr));
140 z->node = GREP_NODE_AND;
141 z->u.binary.left = x;
142 z->u.binary.right = y;
143 return z;
145 return x;
148 static struct grep_expr *compile_pattern_or(struct grep_pat **list)
150 struct grep_pat *p;
151 struct grep_expr *x, *y, *z;
153 x = compile_pattern_and(list);
154 p = *list;
155 if (x && p && p->token != GREP_CLOSE_PAREN) {
156 y = compile_pattern_or(list);
157 if (!y)
158 die("not a pattern expression %s", p->pattern);
159 z = xcalloc(1, sizeof (struct grep_expr));
160 z->node = GREP_NODE_OR;
161 z->u.binary.left = x;
162 z->u.binary.right = y;
163 return z;
165 return x;
168 static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
170 return compile_pattern_or(list);
173 void compile_grep_patterns(struct grep_opt *opt)
175 struct grep_pat *p;
177 if (opt->all_match)
178 opt->extended = 1;
180 for (p = opt->pattern_list; p; p = p->next) {
181 switch (p->token) {
182 case GREP_PATTERN: /* atom */
183 case GREP_PATTERN_HEAD:
184 case GREP_PATTERN_BODY:
185 compile_regexp(p, opt);
186 break;
187 default:
188 opt->extended = 1;
189 break;
193 if (!opt->extended)
194 return;
196 /* Then bundle them up in an expression.
197 * A classic recursive descent parser would do.
199 p = opt->pattern_list;
200 opt->pattern_expression = compile_pattern_expr(&p);
201 if (p)
202 die("incomplete pattern expression: %s", p->pattern);
205 static void free_pattern_expr(struct grep_expr *x)
207 switch (x->node) {
208 case GREP_NODE_ATOM:
209 break;
210 case GREP_NODE_NOT:
211 free_pattern_expr(x->u.unary);
212 break;
213 case GREP_NODE_AND:
214 case GREP_NODE_OR:
215 free_pattern_expr(x->u.binary.left);
216 free_pattern_expr(x->u.binary.right);
217 break;
219 free(x);
222 void free_grep_patterns(struct grep_opt *opt)
224 struct grep_pat *p, *n;
226 for (p = opt->pattern_list; p; p = n) {
227 n = p->next;
228 switch (p->token) {
229 case GREP_PATTERN: /* atom */
230 case GREP_PATTERN_HEAD:
231 case GREP_PATTERN_BODY:
232 regfree(&p->regexp);
233 break;
234 default:
235 break;
237 free(p);
240 if (!opt->extended)
241 return;
242 free_pattern_expr(opt->pattern_expression);
245 static char *end_of_line(char *cp, unsigned long *left)
247 unsigned long l = *left;
248 while (l && *cp != '\n') {
249 l--;
250 cp++;
252 *left = l;
253 return cp;
256 static int word_char(char ch)
258 return isalnum(ch) || ch == '_';
261 static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
262 const char *name, unsigned lno, char sign)
264 if (opt->null_following_name)
265 sign = '\0';
266 if (opt->pathname)
267 printf("%s%c", name, sign);
268 if (opt->linenum)
269 printf("%d%c", lno, sign);
270 printf("%.*s\n", (int)(eol-bol), bol);
273 static void show_name(struct grep_opt *opt, const char *name)
275 printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
278 static int fixmatch(const char *pattern, char *line, regmatch_t *match)
280 char *hit = strstr(line, pattern);
281 if (!hit) {
282 match->rm_so = match->rm_eo = -1;
283 return REG_NOMATCH;
285 else {
286 match->rm_so = hit - line;
287 match->rm_eo = match->rm_so + strlen(pattern);
288 return 0;
292 static int strip_timestamp(char *bol, char **eol_p)
294 char *eol = *eol_p;
295 int ch;
297 while (bol < --eol) {
298 if (*eol != '>')
299 continue;
300 *eol_p = ++eol;
301 ch = *eol;
302 *eol = '\0';
303 return ch;
305 return 0;
308 static struct {
309 const char *field;
310 size_t len;
311 } header_field[] = {
312 { "author ", 7 },
313 { "committer ", 10 },
316 static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
318 int hit = 0;
319 int saved_ch = 0;
320 regmatch_t pmatch[10];
322 if ((p->token != GREP_PATTERN) &&
323 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
324 return 0;
326 if (p->token == GREP_PATTERN_HEAD) {
327 const char *field;
328 size_t len;
329 assert(p->field < ARRAY_SIZE(header_field));
330 field = header_field[p->field].field;
331 len = header_field[p->field].len;
332 if (strncmp(bol, field, len))
333 return 0;
334 bol += len;
335 saved_ch = strip_timestamp(bol, &eol);
338 again:
339 if (!p->fixed) {
340 regex_t *exp = &p->regexp;
341 hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
342 pmatch, 0);
344 else {
345 hit = !fixmatch(p->pattern, bol, pmatch);
348 if (hit && opt->word_regexp) {
349 if ((pmatch[0].rm_so < 0) ||
350 (eol - bol) <= pmatch[0].rm_so ||
351 (pmatch[0].rm_eo < 0) ||
352 (eol - bol) < pmatch[0].rm_eo)
353 die("regexp returned nonsense");
355 /* Match beginning must be either beginning of the
356 * line, or at word boundary (i.e. the last char must
357 * not be a word char). Similarly, match end must be
358 * either end of the line, or at word boundary
359 * (i.e. the next char must not be a word char).
361 if ( ((pmatch[0].rm_so == 0) ||
362 !word_char(bol[pmatch[0].rm_so-1])) &&
363 ((pmatch[0].rm_eo == (eol-bol)) ||
364 !word_char(bol[pmatch[0].rm_eo])) )
366 else
367 hit = 0;
369 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
370 /* There could be more than one match on the
371 * line, and the first match might not be
372 * strict word match. But later ones could be!
373 * Forward to the next possible start, i.e. the
374 * next position following a non-word char.
376 bol = pmatch[0].rm_so + bol + 1;
377 while (word_char(bol[-1]) && bol < eol)
378 bol++;
379 if (bol < eol)
380 goto again;
383 if (p->token == GREP_PATTERN_HEAD && saved_ch)
384 *eol = saved_ch;
385 return hit;
388 static int match_expr_eval(struct grep_opt *o,
389 struct grep_expr *x,
390 char *bol, char *eol,
391 enum grep_context ctx,
392 int collect_hits)
394 int h = 0;
396 switch (x->node) {
397 case GREP_NODE_ATOM:
398 h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
399 break;
400 case GREP_NODE_NOT:
401 h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
402 break;
403 case GREP_NODE_AND:
404 if (!collect_hits)
405 return (match_expr_eval(o, x->u.binary.left,
406 bol, eol, ctx, 0) &&
407 match_expr_eval(o, x->u.binary.right,
408 bol, eol, ctx, 0));
409 h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
410 h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
411 break;
412 case GREP_NODE_OR:
413 if (!collect_hits)
414 return (match_expr_eval(o, x->u.binary.left,
415 bol, eol, ctx, 0) ||
416 match_expr_eval(o, x->u.binary.right,
417 bol, eol, ctx, 0));
418 h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
419 x->u.binary.left->hit |= h;
420 h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
421 break;
422 default:
423 die("Unexpected node type (internal error) %d", x->node);
425 if (collect_hits)
426 x->hit |= h;
427 return h;
430 static int match_expr(struct grep_opt *opt, char *bol, char *eol,
431 enum grep_context ctx, int collect_hits)
433 struct grep_expr *x = opt->pattern_expression;
434 return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
437 static int match_line(struct grep_opt *opt, char *bol, char *eol,
438 enum grep_context ctx, int collect_hits)
440 struct grep_pat *p;
441 if (opt->extended)
442 return match_expr(opt, bol, eol, ctx, collect_hits);
444 /* we do not call with collect_hits without being extended */
445 for (p = opt->pattern_list; p; p = p->next) {
446 if (match_one_pattern(opt, p, bol, eol, ctx))
447 return 1;
449 return 0;
452 static int grep_buffer_1(struct grep_opt *opt, const char *name,
453 char *buf, unsigned long size, int collect_hits)
455 char *bol = buf;
456 unsigned long left = size;
457 unsigned lno = 1;
458 struct pre_context_line {
459 char *bol;
460 char *eol;
461 } *prev = NULL, *pcl;
462 unsigned last_hit = 0;
463 unsigned last_shown = 0;
464 int binary_match_only = 0;
465 const char *hunk_mark = "";
466 unsigned count = 0;
467 enum grep_context ctx = GREP_CONTEXT_HEAD;
469 if (buffer_is_binary(buf, size)) {
470 switch (opt->binary) {
471 case GREP_BINARY_DEFAULT:
472 binary_match_only = 1;
473 break;
474 case GREP_BINARY_NOMATCH:
475 return 0; /* Assume unmatch */
476 break;
477 default:
478 break;
482 if (opt->pre_context)
483 prev = xcalloc(opt->pre_context, sizeof(*prev));
484 if (opt->pre_context || opt->post_context)
485 hunk_mark = "--\n";
487 while (left) {
488 char *eol, ch;
489 int hit;
491 eol = end_of_line(bol, &left);
492 ch = *eol;
493 *eol = 0;
495 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
496 ctx = GREP_CONTEXT_BODY;
498 hit = match_line(opt, bol, eol, ctx, collect_hits);
499 *eol = ch;
501 if (collect_hits)
502 goto next_line;
504 /* "grep -v -e foo -e bla" should list lines
505 * that do not have either, so inversion should
506 * be done outside.
508 if (opt->invert)
509 hit = !hit;
510 if (opt->unmatch_name_only) {
511 if (hit)
512 return 0;
513 goto next_line;
515 if (hit) {
516 count++;
517 if (opt->status_only)
518 return 1;
519 if (binary_match_only) {
520 printf("Binary file %s matches\n", name);
521 return 1;
523 if (opt->name_only) {
524 show_name(opt, name);
525 return 1;
527 /* Hit at this line. If we haven't shown the
528 * pre-context lines, we would need to show them.
529 * When asked to do "count", this still show
530 * the context which is nonsense, but the user
531 * deserves to get that ;-).
533 if (opt->pre_context) {
534 unsigned from;
535 if (opt->pre_context < lno)
536 from = lno - opt->pre_context;
537 else
538 from = 1;
539 if (from <= last_shown)
540 from = last_shown + 1;
541 if (last_shown && from != last_shown + 1)
542 fputs(hunk_mark, stdout);
543 while (from < lno) {
544 pcl = &prev[lno-from-1];
545 show_line(opt, pcl->bol, pcl->eol,
546 name, from, '-');
547 from++;
549 last_shown = lno-1;
551 if (last_shown && lno != last_shown + 1)
552 fputs(hunk_mark, stdout);
553 if (!opt->count)
554 show_line(opt, bol, eol, name, lno, ':');
555 last_shown = last_hit = lno;
557 else if (last_hit &&
558 lno <= last_hit + opt->post_context) {
559 /* If the last hit is within the post context,
560 * we need to show this line.
562 if (last_shown && lno != last_shown + 1)
563 fputs(hunk_mark, stdout);
564 show_line(opt, bol, eol, name, lno, '-');
565 last_shown = lno;
567 if (opt->pre_context) {
568 memmove(prev+1, prev,
569 (opt->pre_context-1) * sizeof(*prev));
570 prev->bol = bol;
571 prev->eol = eol;
574 next_line:
575 bol = eol + 1;
576 if (!left)
577 break;
578 left--;
579 lno++;
582 free(prev);
583 if (collect_hits)
584 return 0;
586 if (opt->status_only)
587 return 0;
588 if (opt->unmatch_name_only) {
589 /* We did not see any hit, so we want to show this */
590 show_name(opt, name);
591 return 1;
594 /* NEEDSWORK:
595 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
596 * which feels mostly useless but sometimes useful. Maybe
597 * make it another option? For now suppress them.
599 if (opt->count && count)
600 printf("%s%c%u\n", name,
601 opt->null_following_name ? '\0' : ':', count);
602 return !!last_hit;
605 static void clr_hit_marker(struct grep_expr *x)
607 /* All-hit markers are meaningful only at the very top level
608 * OR node.
610 while (1) {
611 x->hit = 0;
612 if (x->node != GREP_NODE_OR)
613 return;
614 x->u.binary.left->hit = 0;
615 x = x->u.binary.right;
619 static int chk_hit_marker(struct grep_expr *x)
621 /* Top level nodes have hit markers. See if they all are hits */
622 while (1) {
623 if (x->node != GREP_NODE_OR)
624 return x->hit;
625 if (!x->u.binary.left->hit)
626 return 0;
627 x = x->u.binary.right;
631 int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
634 * we do not have to do the two-pass grep when we do not check
635 * buffer-wide "all-match".
637 if (!opt->all_match)
638 return grep_buffer_1(opt, name, buf, size, 0);
640 /* Otherwise the toplevel "or" terms hit a bit differently.
641 * We first clear hit markers from them.
643 clr_hit_marker(opt->pattern_expression);
644 grep_buffer_1(opt, name, buf, size, 1);
646 if (!chk_hit_marker(opt->pattern_expression))
647 return 0;
649 return grep_buffer_1(opt, name, buf, size, 0);