grep: support newline separated pattern list
[alt-git.git] / grep.c
blob02258039d919d62f0ad08eb5933765c7f9a5d602
1 #include "cache.h"
2 #include "grep.h"
3 #include "userdiff.h"
4 #include "xdiff-interface.h"
6 static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
7 const char *origin, int no,
8 enum grep_pat_token t,
9 enum grep_header_field field)
11 struct grep_pat *p = xcalloc(1, sizeof(*p));
12 p->pattern = xmemdupz(pat, patlen);
13 p->patternlen = patlen;
14 p->origin = origin;
15 p->no = no;
16 p->token = t;
17 p->field = field;
18 return p;
21 static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
23 **tail = p;
24 *tail = &p->next;
25 p->next = NULL;
27 switch (p->token) {
28 case GREP_PATTERN: /* atom */
29 case GREP_PATTERN_HEAD:
30 case GREP_PATTERN_BODY:
31 for (;;) {
32 struct grep_pat *new_pat;
33 size_t len = 0;
34 char *cp = p->pattern + p->patternlen, *nl = NULL;
35 while (++len <= p->patternlen) {
36 if (*(--cp) == '\n') {
37 nl = cp;
38 break;
41 if (!nl)
42 break;
43 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
44 p->no, p->token, p->field);
45 new_pat->next = p->next;
46 if (!p->next)
47 *tail = &new_pat->next;
48 p->next = new_pat;
49 *nl = '\0';
50 p->patternlen -= len;
52 break;
53 default:
54 break;
58 void append_header_grep_pattern(struct grep_opt *opt,
59 enum grep_header_field field, const char *pat)
61 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
62 GREP_PATTERN_HEAD, field);
63 do_append_grep_pat(&opt->header_tail, p);
66 void append_grep_pattern(struct grep_opt *opt, const char *pat,
67 const char *origin, int no, enum grep_pat_token t)
69 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
72 void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
73 const char *origin, int no, enum grep_pat_token t)
75 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
76 do_append_grep_pat(&opt->pattern_tail, p);
79 struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
81 struct grep_pat *pat;
82 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
83 *ret = *opt;
85 ret->pattern_list = NULL;
86 ret->pattern_tail = &ret->pattern_list;
88 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
90 if(pat->token == GREP_PATTERN_HEAD)
91 append_header_grep_pattern(ret, pat->field,
92 pat->pattern);
93 else
94 append_grep_pat(ret, pat->pattern, pat->patternlen,
95 pat->origin, pat->no, pat->token);
98 return ret;
101 static NORETURN void compile_regexp_failed(const struct grep_pat *p,
102 const char *error)
104 char where[1024];
106 if (p->no)
107 sprintf(where, "In '%s' at %d, ", p->origin, p->no);
108 else if (p->origin)
109 sprintf(where, "%s, ", p->origin);
110 else
111 where[0] = 0;
113 die("%s'%s': %s", where, p->pattern, error);
116 #ifdef USE_LIBPCRE
117 static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
119 const char *error;
120 int erroffset;
121 int options = 0;
123 if (opt->ignore_case)
124 options |= PCRE_CASELESS;
126 p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
127 NULL);
128 if (!p->pcre_regexp)
129 compile_regexp_failed(p, error);
131 p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
132 if (!p->pcre_extra_info && error)
133 die("%s", error);
136 static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
137 regmatch_t *match, int eflags)
139 int ovector[30], ret, flags = 0;
141 if (eflags & REG_NOTBOL)
142 flags |= PCRE_NOTBOL;
144 ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
145 0, flags, ovector, ARRAY_SIZE(ovector));
146 if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
147 die("pcre_exec failed with error code %d", ret);
148 if (ret > 0) {
149 ret = 0;
150 match->rm_so = ovector[0];
151 match->rm_eo = ovector[1];
154 return ret;
157 static void free_pcre_regexp(struct grep_pat *p)
159 pcre_free(p->pcre_regexp);
160 pcre_free(p->pcre_extra_info);
162 #else /* !USE_LIBPCRE */
163 static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
165 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
168 static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
169 regmatch_t *match, int eflags)
171 return 1;
174 static void free_pcre_regexp(struct grep_pat *p)
177 #endif /* !USE_LIBPCRE */
179 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
181 int err;
183 p->word_regexp = opt->word_regexp;
184 p->ignore_case = opt->ignore_case;
185 p->fixed = opt->fixed;
187 if (p->fixed)
188 return;
190 if (opt->pcre) {
191 compile_pcre_regexp(p, opt);
192 return;
195 err = regcomp(&p->regexp, p->pattern, opt->regflags);
196 if (err) {
197 char errbuf[1024];
198 regerror(err, &p->regexp, errbuf, 1024);
199 regfree(&p->regexp);
200 compile_regexp_failed(p, errbuf);
204 static struct grep_expr *compile_pattern_or(struct grep_pat **);
205 static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
207 struct grep_pat *p;
208 struct grep_expr *x;
210 p = *list;
211 if (!p)
212 return NULL;
213 switch (p->token) {
214 case GREP_PATTERN: /* atom */
215 case GREP_PATTERN_HEAD:
216 case GREP_PATTERN_BODY:
217 x = xcalloc(1, sizeof (struct grep_expr));
218 x->node = GREP_NODE_ATOM;
219 x->u.atom = p;
220 *list = p->next;
221 return x;
222 case GREP_OPEN_PAREN:
223 *list = p->next;
224 x = compile_pattern_or(list);
225 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
226 die("unmatched parenthesis");
227 *list = (*list)->next;
228 return x;
229 default:
230 return NULL;
234 static struct grep_expr *compile_pattern_not(struct grep_pat **list)
236 struct grep_pat *p;
237 struct grep_expr *x;
239 p = *list;
240 if (!p)
241 return NULL;
242 switch (p->token) {
243 case GREP_NOT:
244 if (!p->next)
245 die("--not not followed by pattern expression");
246 *list = p->next;
247 x = xcalloc(1, sizeof (struct grep_expr));
248 x->node = GREP_NODE_NOT;
249 x->u.unary = compile_pattern_not(list);
250 if (!x->u.unary)
251 die("--not followed by non pattern expression");
252 return x;
253 default:
254 return compile_pattern_atom(list);
258 static struct grep_expr *compile_pattern_and(struct grep_pat **list)
260 struct grep_pat *p;
261 struct grep_expr *x, *y, *z;
263 x = compile_pattern_not(list);
264 p = *list;
265 if (p && p->token == GREP_AND) {
266 if (!p->next)
267 die("--and not followed by pattern expression");
268 *list = p->next;
269 y = compile_pattern_and(list);
270 if (!y)
271 die("--and not followed by pattern expression");
272 z = xcalloc(1, sizeof (struct grep_expr));
273 z->node = GREP_NODE_AND;
274 z->u.binary.left = x;
275 z->u.binary.right = y;
276 return z;
278 return x;
281 static struct grep_expr *compile_pattern_or(struct grep_pat **list)
283 struct grep_pat *p;
284 struct grep_expr *x, *y, *z;
286 x = compile_pattern_and(list);
287 p = *list;
288 if (x && p && p->token != GREP_CLOSE_PAREN) {
289 y = compile_pattern_or(list);
290 if (!y)
291 die("not a pattern expression %s", p->pattern);
292 z = xcalloc(1, sizeof (struct grep_expr));
293 z->node = GREP_NODE_OR;
294 z->u.binary.left = x;
295 z->u.binary.right = y;
296 return z;
298 return x;
301 static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
303 return compile_pattern_or(list);
306 static struct grep_expr *grep_true_expr(void)
308 struct grep_expr *z = xcalloc(1, sizeof(*z));
309 z->node = GREP_NODE_TRUE;
310 return z;
313 static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
315 struct grep_expr *z = xcalloc(1, sizeof(*z));
316 z->node = GREP_NODE_OR;
317 z->u.binary.left = left;
318 z->u.binary.right = right;
319 return z;
322 static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
324 struct grep_pat *p;
325 struct grep_expr *header_expr;
326 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
327 enum grep_header_field fld;
329 if (!opt->header_list)
330 return NULL;
331 p = opt->header_list;
332 for (p = opt->header_list; p; p = p->next) {
333 if (p->token != GREP_PATTERN_HEAD)
334 die("bug: a non-header pattern in grep header list.");
335 if (p->field < 0 || GREP_HEADER_FIELD_MAX <= p->field)
336 die("bug: unknown header field %d", p->field);
337 compile_regexp(p, opt);
340 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
341 header_group[fld] = NULL;
343 for (p = opt->header_list; p; p = p->next) {
344 struct grep_expr *h;
345 struct grep_pat *pp = p;
347 h = compile_pattern_atom(&pp);
348 if (!h || pp != p->next)
349 die("bug: malformed header expr");
350 if (!header_group[p->field]) {
351 header_group[p->field] = h;
352 continue;
354 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
357 header_expr = NULL;
359 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
360 if (!header_group[fld])
361 continue;
362 if (!header_expr)
363 header_expr = grep_true_expr();
364 header_expr = grep_or_expr(header_group[fld], header_expr);
366 return header_expr;
369 void compile_grep_patterns(struct grep_opt *opt)
371 struct grep_pat *p;
372 struct grep_expr *header_expr = prep_header_patterns(opt);
374 for (p = opt->pattern_list; p; p = p->next) {
375 switch (p->token) {
376 case GREP_PATTERN: /* atom */
377 case GREP_PATTERN_HEAD:
378 case GREP_PATTERN_BODY:
379 compile_regexp(p, opt);
380 break;
381 default:
382 opt->extended = 1;
383 break;
387 if (opt->all_match || header_expr)
388 opt->extended = 1;
389 else if (!opt->extended)
390 return;
392 p = opt->pattern_list;
393 if (p)
394 opt->pattern_expression = compile_pattern_expr(&p);
395 if (p)
396 die("incomplete pattern expression: %s", p->pattern);
398 if (!header_expr)
399 return;
401 if (!opt->pattern_expression)
402 opt->pattern_expression = header_expr;
403 else
404 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
405 header_expr);
406 opt->all_match = 1;
409 static void free_pattern_expr(struct grep_expr *x)
411 switch (x->node) {
412 case GREP_NODE_TRUE:
413 case GREP_NODE_ATOM:
414 break;
415 case GREP_NODE_NOT:
416 free_pattern_expr(x->u.unary);
417 break;
418 case GREP_NODE_AND:
419 case GREP_NODE_OR:
420 free_pattern_expr(x->u.binary.left);
421 free_pattern_expr(x->u.binary.right);
422 break;
424 free(x);
427 void free_grep_patterns(struct grep_opt *opt)
429 struct grep_pat *p, *n;
431 for (p = opt->pattern_list; p; p = n) {
432 n = p->next;
433 switch (p->token) {
434 case GREP_PATTERN: /* atom */
435 case GREP_PATTERN_HEAD:
436 case GREP_PATTERN_BODY:
437 if (p->pcre_regexp)
438 free_pcre_regexp(p);
439 else
440 regfree(&p->regexp);
441 free(p->pattern);
442 break;
443 default:
444 break;
446 free(p);
449 if (!opt->extended)
450 return;
451 free_pattern_expr(opt->pattern_expression);
454 static char *end_of_line(char *cp, unsigned long *left)
456 unsigned long l = *left;
457 while (l && *cp != '\n') {
458 l--;
459 cp++;
461 *left = l;
462 return cp;
465 static int word_char(char ch)
467 return isalnum(ch) || ch == '_';
470 static void output_color(struct grep_opt *opt, const void *data, size_t size,
471 const char *color)
473 if (opt->color && color && color[0]) {
474 opt->output(opt, color, strlen(color));
475 opt->output(opt, data, size);
476 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
477 } else
478 opt->output(opt, data, size);
481 static void output_sep(struct grep_opt *opt, char sign)
483 if (opt->null_following_name)
484 opt->output(opt, "\0", 1);
485 else
486 output_color(opt, &sign, 1, opt->color_sep);
489 static void show_name(struct grep_opt *opt, const char *name)
491 output_color(opt, name, strlen(name), opt->color_filename);
492 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
495 static int fixmatch(struct grep_pat *p, char *line, char *eol,
496 regmatch_t *match)
498 char *hit;
500 if (p->ignore_case) {
501 char *s = line;
502 do {
503 hit = strcasestr(s, p->pattern);
504 if (hit)
505 break;
506 s += strlen(s) + 1;
507 } while (s < eol);
508 } else
509 hit = memmem(line, eol - line, p->pattern, p->patternlen);
511 if (!hit) {
512 match->rm_so = match->rm_eo = -1;
513 return REG_NOMATCH;
515 else {
516 match->rm_so = hit - line;
517 match->rm_eo = match->rm_so + p->patternlen;
518 return 0;
522 static int regmatch(const regex_t *preg, char *line, char *eol,
523 regmatch_t *match, int eflags)
525 #ifdef REG_STARTEND
526 match->rm_so = 0;
527 match->rm_eo = eol - line;
528 eflags |= REG_STARTEND;
529 #endif
530 return regexec(preg, line, 1, match, eflags);
533 static int patmatch(struct grep_pat *p, char *line, char *eol,
534 regmatch_t *match, int eflags)
536 int hit;
538 if (p->fixed)
539 hit = !fixmatch(p, line, eol, match);
540 else if (p->pcre_regexp)
541 hit = !pcrematch(p, line, eol, match, eflags);
542 else
543 hit = !regmatch(&p->regexp, line, eol, match, eflags);
545 return hit;
548 static int strip_timestamp(char *bol, char **eol_p)
550 char *eol = *eol_p;
551 int ch;
553 while (bol < --eol) {
554 if (*eol != '>')
555 continue;
556 *eol_p = ++eol;
557 ch = *eol;
558 *eol = '\0';
559 return ch;
561 return 0;
564 static struct {
565 const char *field;
566 size_t len;
567 } header_field[] = {
568 { "author ", 7 },
569 { "committer ", 10 },
572 static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
573 enum grep_context ctx,
574 regmatch_t *pmatch, int eflags)
576 int hit = 0;
577 int saved_ch = 0;
578 const char *start = bol;
580 if ((p->token != GREP_PATTERN) &&
581 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
582 return 0;
584 if (p->token == GREP_PATTERN_HEAD) {
585 const char *field;
586 size_t len;
587 assert(p->field < ARRAY_SIZE(header_field));
588 field = header_field[p->field].field;
589 len = header_field[p->field].len;
590 if (strncmp(bol, field, len))
591 return 0;
592 bol += len;
593 saved_ch = strip_timestamp(bol, &eol);
596 again:
597 hit = patmatch(p, bol, eol, pmatch, eflags);
599 if (hit && p->word_regexp) {
600 if ((pmatch[0].rm_so < 0) ||
601 (eol - bol) < pmatch[0].rm_so ||
602 (pmatch[0].rm_eo < 0) ||
603 (eol - bol) < pmatch[0].rm_eo)
604 die("regexp returned nonsense");
606 /* Match beginning must be either beginning of the
607 * line, or at word boundary (i.e. the last char must
608 * not be a word char). Similarly, match end must be
609 * either end of the line, or at word boundary
610 * (i.e. the next char must not be a word char).
612 if ( ((pmatch[0].rm_so == 0) ||
613 !word_char(bol[pmatch[0].rm_so-1])) &&
614 ((pmatch[0].rm_eo == (eol-bol)) ||
615 !word_char(bol[pmatch[0].rm_eo])) )
617 else
618 hit = 0;
620 /* Words consist of at least one character. */
621 if (pmatch->rm_so == pmatch->rm_eo)
622 hit = 0;
624 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
625 /* There could be more than one match on the
626 * line, and the first match might not be
627 * strict word match. But later ones could be!
628 * Forward to the next possible start, i.e. the
629 * next position following a non-word char.
631 bol = pmatch[0].rm_so + bol + 1;
632 while (word_char(bol[-1]) && bol < eol)
633 bol++;
634 eflags |= REG_NOTBOL;
635 if (bol < eol)
636 goto again;
639 if (p->token == GREP_PATTERN_HEAD && saved_ch)
640 *eol = saved_ch;
641 if (hit) {
642 pmatch[0].rm_so += bol - start;
643 pmatch[0].rm_eo += bol - start;
645 return hit;
648 static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
649 enum grep_context ctx, int collect_hits)
651 int h = 0;
652 regmatch_t match;
654 if (!x)
655 die("Not a valid grep expression");
656 switch (x->node) {
657 case GREP_NODE_TRUE:
658 h = 1;
659 break;
660 case GREP_NODE_ATOM:
661 h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
662 break;
663 case GREP_NODE_NOT:
664 h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
665 break;
666 case GREP_NODE_AND:
667 if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
668 return 0;
669 h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
670 break;
671 case GREP_NODE_OR:
672 if (!collect_hits)
673 return (match_expr_eval(x->u.binary.left,
674 bol, eol, ctx, 0) ||
675 match_expr_eval(x->u.binary.right,
676 bol, eol, ctx, 0));
677 h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
678 x->u.binary.left->hit |= h;
679 h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
680 break;
681 default:
682 die("Unexpected node type (internal error) %d", x->node);
684 if (collect_hits)
685 x->hit |= h;
686 return h;
689 static int match_expr(struct grep_opt *opt, char *bol, char *eol,
690 enum grep_context ctx, int collect_hits)
692 struct grep_expr *x = opt->pattern_expression;
693 return match_expr_eval(x, bol, eol, ctx, collect_hits);
696 static int match_line(struct grep_opt *opt, char *bol, char *eol,
697 enum grep_context ctx, int collect_hits)
699 struct grep_pat *p;
700 regmatch_t match;
702 if (opt->extended)
703 return match_expr(opt, bol, eol, ctx, collect_hits);
705 /* we do not call with collect_hits without being extended */
706 for (p = opt->pattern_list; p; p = p->next) {
707 if (match_one_pattern(p, bol, eol, ctx, &match, 0))
708 return 1;
710 return 0;
713 static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
714 enum grep_context ctx,
715 regmatch_t *pmatch, int eflags)
717 regmatch_t match;
719 if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
720 return 0;
721 if (match.rm_so < 0 || match.rm_eo < 0)
722 return 0;
723 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
724 if (match.rm_so > pmatch->rm_so)
725 return 1;
726 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
727 return 1;
729 pmatch->rm_so = match.rm_so;
730 pmatch->rm_eo = match.rm_eo;
731 return 1;
734 static int next_match(struct grep_opt *opt, char *bol, char *eol,
735 enum grep_context ctx, regmatch_t *pmatch, int eflags)
737 struct grep_pat *p;
738 int hit = 0;
740 pmatch->rm_so = pmatch->rm_eo = -1;
741 if (bol < eol) {
742 for (p = opt->pattern_list; p; p = p->next) {
743 switch (p->token) {
744 case GREP_PATTERN: /* atom */
745 case GREP_PATTERN_HEAD:
746 case GREP_PATTERN_BODY:
747 hit |= match_next_pattern(p, bol, eol, ctx,
748 pmatch, eflags);
749 break;
750 default:
751 break;
755 return hit;
758 static void show_line(struct grep_opt *opt, char *bol, char *eol,
759 const char *name, unsigned lno, char sign)
761 int rest = eol - bol;
762 char *line_color = NULL;
764 if (opt->pre_context || opt->post_context) {
765 if (opt->last_shown == 0) {
766 if (opt->show_hunk_mark) {
767 output_color(opt, "--", 2, opt->color_sep);
768 opt->output(opt, "\n", 1);
770 } else if (lno > opt->last_shown + 1) {
771 output_color(opt, "--", 2, opt->color_sep);
772 opt->output(opt, "\n", 1);
775 opt->last_shown = lno;
777 if (opt->pathname) {
778 output_color(opt, name, strlen(name), opt->color_filename);
779 output_sep(opt, sign);
781 if (opt->linenum) {
782 char buf[32];
783 snprintf(buf, sizeof(buf), "%d", lno);
784 output_color(opt, buf, strlen(buf), opt->color_lineno);
785 output_sep(opt, sign);
787 if (opt->color) {
788 regmatch_t match;
789 enum grep_context ctx = GREP_CONTEXT_BODY;
790 int ch = *eol;
791 int eflags = 0;
793 if (sign == ':')
794 line_color = opt->color_selected;
795 else if (sign == '-')
796 line_color = opt->color_context;
797 else if (sign == '=')
798 line_color = opt->color_function;
799 *eol = '\0';
800 while (next_match(opt, bol, eol, ctx, &match, eflags)) {
801 if (match.rm_so == match.rm_eo)
802 break;
804 output_color(opt, bol, match.rm_so, line_color);
805 output_color(opt, bol + match.rm_so,
806 match.rm_eo - match.rm_so,
807 opt->color_match);
808 bol += match.rm_eo;
809 rest -= match.rm_eo;
810 eflags = REG_NOTBOL;
812 *eol = ch;
814 output_color(opt, bol, rest, line_color);
815 opt->output(opt, "\n", 1);
818 static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
820 xdemitconf_t *xecfg = opt->priv;
821 if (xecfg && xecfg->find_func) {
822 char buf[1];
823 return xecfg->find_func(bol, eol - bol, buf, 1,
824 xecfg->find_func_priv) >= 0;
827 if (bol == eol)
828 return 0;
829 if (isalpha(*bol) || *bol == '_' || *bol == '$')
830 return 1;
831 return 0;
834 static void show_funcname_line(struct grep_opt *opt, const char *name,
835 char *buf, char *bol, unsigned lno)
837 while (bol > buf) {
838 char *eol = --bol;
840 while (bol > buf && bol[-1] != '\n')
841 bol--;
842 lno--;
844 if (lno <= opt->last_shown)
845 break;
847 if (match_funcname(opt, bol, eol)) {
848 show_line(opt, bol, eol, name, lno, '=');
849 break;
854 static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
855 char *bol, unsigned lno)
857 unsigned cur = lno, from = 1, funcname_lno = 0;
858 int funcname_needed = opt->funcname;
860 if (opt->pre_context < lno)
861 from = lno - opt->pre_context;
862 if (from <= opt->last_shown)
863 from = opt->last_shown + 1;
865 /* Rewind. */
866 while (bol > buf && cur > from) {
867 char *eol = --bol;
869 while (bol > buf && bol[-1] != '\n')
870 bol--;
871 cur--;
872 if (funcname_needed && match_funcname(opt, bol, eol)) {
873 funcname_lno = cur;
874 funcname_needed = 0;
878 /* We need to look even further back to find a function signature. */
879 if (opt->funcname && funcname_needed)
880 show_funcname_line(opt, name, buf, bol, cur);
882 /* Back forward. */
883 while (cur < lno) {
884 char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
886 while (*eol != '\n')
887 eol++;
888 show_line(opt, bol, eol, name, cur, sign);
889 bol = eol + 1;
890 cur++;
894 static int should_lookahead(struct grep_opt *opt)
896 struct grep_pat *p;
898 if (opt->extended)
899 return 0; /* punt for too complex stuff */
900 if (opt->invert)
901 return 0;
902 for (p = opt->pattern_list; p; p = p->next) {
903 if (p->token != GREP_PATTERN)
904 return 0; /* punt for "header only" and stuff */
906 return 1;
909 static int look_ahead(struct grep_opt *opt,
910 unsigned long *left_p,
911 unsigned *lno_p,
912 char **bol_p)
914 unsigned lno = *lno_p;
915 char *bol = *bol_p;
916 struct grep_pat *p;
917 char *sp, *last_bol;
918 regoff_t earliest = -1;
920 for (p = opt->pattern_list; p; p = p->next) {
921 int hit;
922 regmatch_t m;
924 hit = patmatch(p, bol, bol + *left_p, &m, 0);
925 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
926 continue;
927 if (earliest < 0 || m.rm_so < earliest)
928 earliest = m.rm_so;
931 if (earliest < 0) {
932 *bol_p = bol + *left_p;
933 *left_p = 0;
934 return 1;
936 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
937 ; /* find the beginning of the line */
938 last_bol = sp;
940 for (sp = bol; sp < last_bol; sp++) {
941 if (*sp == '\n')
942 lno++;
944 *left_p -= last_bol - bol;
945 *bol_p = last_bol;
946 *lno_p = lno;
947 return 0;
950 int grep_threads_ok(const struct grep_opt *opt)
952 /* If this condition is true, then we may use the attribute
953 * machinery in grep_buffer_1. The attribute code is not
954 * thread safe, so we disable the use of threads.
956 if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
957 !opt->name_only)
958 return 0;
960 return 1;
963 static void std_output(struct grep_opt *opt, const void *buf, size_t size)
965 fwrite(buf, size, 1, stdout);
968 static int grep_buffer_1(struct grep_opt *opt, const char *name,
969 char *buf, unsigned long size, int collect_hits)
971 char *bol = buf;
972 unsigned long left = size;
973 unsigned lno = 1;
974 unsigned last_hit = 0;
975 int binary_match_only = 0;
976 unsigned count = 0;
977 int try_lookahead = 0;
978 enum grep_context ctx = GREP_CONTEXT_HEAD;
979 xdemitconf_t xecfg;
981 if (!opt->output)
982 opt->output = std_output;
984 if (opt->last_shown && (opt->pre_context || opt->post_context) &&
985 opt->output == std_output)
986 opt->show_hunk_mark = 1;
987 opt->last_shown = 0;
989 switch (opt->binary) {
990 case GREP_BINARY_DEFAULT:
991 if (buffer_is_binary(buf, size))
992 binary_match_only = 1;
993 break;
994 case GREP_BINARY_NOMATCH:
995 if (buffer_is_binary(buf, size))
996 return 0; /* Assume unmatch */
997 break;
998 case GREP_BINARY_TEXT:
999 break;
1000 default:
1001 die("bug: unknown binary handling mode");
1004 memset(&xecfg, 0, sizeof(xecfg));
1005 if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
1006 !opt->name_only && !binary_match_only && !collect_hits) {
1007 struct userdiff_driver *drv = userdiff_find_by_path(name);
1008 if (drv && drv->funcname.pattern) {
1009 const struct userdiff_funcname *pe = &drv->funcname;
1010 xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
1011 opt->priv = &xecfg;
1014 try_lookahead = should_lookahead(opt);
1016 while (left) {
1017 char *eol, ch;
1018 int hit;
1021 * look_ahead() skips quickly to the line that possibly
1022 * has the next hit; don't call it if we need to do
1023 * something more than just skipping the current line
1024 * in response to an unmatch for the current line. E.g.
1025 * inside a post-context window, we will show the current
1026 * line as a context around the previous hit when it
1027 * doesn't hit.
1029 if (try_lookahead
1030 && !(last_hit
1031 && lno <= last_hit + opt->post_context)
1032 && look_ahead(opt, &left, &lno, &bol))
1033 break;
1034 eol = end_of_line(bol, &left);
1035 ch = *eol;
1036 *eol = 0;
1038 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1039 ctx = GREP_CONTEXT_BODY;
1041 hit = match_line(opt, bol, eol, ctx, collect_hits);
1042 *eol = ch;
1044 if (collect_hits)
1045 goto next_line;
1047 /* "grep -v -e foo -e bla" should list lines
1048 * that do not have either, so inversion should
1049 * be done outside.
1051 if (opt->invert)
1052 hit = !hit;
1053 if (opt->unmatch_name_only) {
1054 if (hit)
1055 return 0;
1056 goto next_line;
1058 if (hit) {
1059 count++;
1060 if (opt->status_only)
1061 return 1;
1062 if (opt->name_only) {
1063 show_name(opt, name);
1064 return 1;
1066 if (opt->count)
1067 goto next_line;
1068 if (binary_match_only) {
1069 opt->output(opt, "Binary file ", 12);
1070 output_color(opt, name, strlen(name),
1071 opt->color_filename);
1072 opt->output(opt, " matches\n", 9);
1073 return 1;
1075 /* Hit at this line. If we haven't shown the
1076 * pre-context lines, we would need to show them.
1078 if (opt->pre_context)
1079 show_pre_context(opt, name, buf, bol, lno);
1080 else if (opt->funcname)
1081 show_funcname_line(opt, name, buf, bol, lno);
1082 show_line(opt, bol, eol, name, lno, ':');
1083 last_hit = lno;
1085 else if (last_hit &&
1086 lno <= last_hit + opt->post_context) {
1087 /* If the last hit is within the post context,
1088 * we need to show this line.
1090 show_line(opt, bol, eol, name, lno, '-');
1093 next_line:
1094 bol = eol + 1;
1095 if (!left)
1096 break;
1097 left--;
1098 lno++;
1101 if (collect_hits)
1102 return 0;
1104 if (opt->status_only)
1105 return 0;
1106 if (opt->unmatch_name_only) {
1107 /* We did not see any hit, so we want to show this */
1108 show_name(opt, name);
1109 return 1;
1112 xdiff_clear_find_func(&xecfg);
1113 opt->priv = NULL;
1115 /* NEEDSWORK:
1116 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1117 * which feels mostly useless but sometimes useful. Maybe
1118 * make it another option? For now suppress them.
1120 if (opt->count && count) {
1121 char buf[32];
1122 output_color(opt, name, strlen(name), opt->color_filename);
1123 output_sep(opt, ':');
1124 snprintf(buf, sizeof(buf), "%u\n", count);
1125 opt->output(opt, buf, strlen(buf));
1126 return 1;
1128 return !!last_hit;
1131 static void clr_hit_marker(struct grep_expr *x)
1133 /* All-hit markers are meaningful only at the very top level
1134 * OR node.
1136 while (1) {
1137 x->hit = 0;
1138 if (x->node != GREP_NODE_OR)
1139 return;
1140 x->u.binary.left->hit = 0;
1141 x = x->u.binary.right;
1145 static int chk_hit_marker(struct grep_expr *x)
1147 /* Top level nodes have hit markers. See if they all are hits */
1148 while (1) {
1149 if (x->node != GREP_NODE_OR)
1150 return x->hit;
1151 if (!x->u.binary.left->hit)
1152 return 0;
1153 x = x->u.binary.right;
1157 int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
1160 * we do not have to do the two-pass grep when we do not check
1161 * buffer-wide "all-match".
1163 if (!opt->all_match)
1164 return grep_buffer_1(opt, name, buf, size, 0);
1166 /* Otherwise the toplevel "or" terms hit a bit differently.
1167 * We first clear hit markers from them.
1169 clr_hit_marker(opt->pattern_expression);
1170 grep_buffer_1(opt, name, buf, size, 1);
1172 if (!chk_hit_marker(opt->pattern_expression))
1173 return 0;
1175 return grep_buffer_1(opt, name, buf, size, 0);