grep.h: make "grep_opt.pattern_type_option" use its enum
[alt-git.git] / grep.c
blob35e12e43c09fe794b250d1e2264eee684cbd36f1
1 #include "cache.h"
2 #include "config.h"
3 #include "grep.h"
4 #include "object-store.h"
5 #include "userdiff.h"
6 #include "xdiff-interface.h"
7 #include "diff.h"
8 #include "diffcore.h"
9 #include "commit.h"
10 #include "quote.h"
11 #include "help.h"
13 static int grep_source_load(struct grep_source *gs);
14 static int grep_source_is_binary(struct grep_source *gs,
15 struct index_state *istate);
17 static void std_output(struct grep_opt *opt, const void *buf, size_t size)
19 fwrite(buf, size, 1, stdout);
22 static const char *color_grep_slots[] = {
23 [GREP_COLOR_CONTEXT] = "context",
24 [GREP_COLOR_FILENAME] = "filename",
25 [GREP_COLOR_FUNCTION] = "function",
26 [GREP_COLOR_LINENO] = "lineNumber",
27 [GREP_COLOR_COLUMNNO] = "column",
28 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
29 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
30 [GREP_COLOR_SELECTED] = "selected",
31 [GREP_COLOR_SEP] = "separator",
34 static int parse_pattern_type_arg(const char *opt, const char *arg)
36 if (!strcmp(arg, "default"))
37 return GREP_PATTERN_TYPE_UNSPECIFIED;
38 else if (!strcmp(arg, "basic"))
39 return GREP_PATTERN_TYPE_BRE;
40 else if (!strcmp(arg, "extended"))
41 return GREP_PATTERN_TYPE_ERE;
42 else if (!strcmp(arg, "fixed"))
43 return GREP_PATTERN_TYPE_FIXED;
44 else if (!strcmp(arg, "perl"))
45 return GREP_PATTERN_TYPE_PCRE;
46 die("bad %s argument: %s", opt, arg);
49 define_list_config_array_extra(color_grep_slots, {"match"});
52 * Read the configuration file once and store it in
53 * the grep_defaults template.
55 int grep_config(const char *var, const char *value, void *cb)
57 struct grep_opt *opt = cb;
58 const char *slot;
60 if (userdiff_config(var, value) < 0)
61 return -1;
63 if (!strcmp(var, "grep.extendedregexp")) {
64 opt->extended_regexp_option = git_config_bool(var, value);
65 return 0;
68 if (!strcmp(var, "grep.patterntype")) {
69 opt->pattern_type_option = parse_pattern_type_arg(var, value);
70 return 0;
73 if (!strcmp(var, "grep.linenumber")) {
74 opt->linenum = git_config_bool(var, value);
75 return 0;
77 if (!strcmp(var, "grep.column")) {
78 opt->columnnum = git_config_bool(var, value);
79 return 0;
82 if (!strcmp(var, "grep.fullname")) {
83 opt->relative = !git_config_bool(var, value);
84 return 0;
87 if (!strcmp(var, "color.grep"))
88 opt->color = git_config_colorbool(var, value);
89 if (!strcmp(var, "color.grep.match")) {
90 if (grep_config("color.grep.matchcontext", value, cb) < 0)
91 return -1;
92 if (grep_config("color.grep.matchselected", value, cb) < 0)
93 return -1;
94 } else if (skip_prefix(var, "color.grep.", &slot)) {
95 int i = LOOKUP_CONFIG(color_grep_slots, slot);
96 char *color;
98 if (i < 0)
99 return -1;
100 color = opt->colors[i];
101 if (!value)
102 return config_error_nonbool(var);
103 return color_parse(value, color);
105 return 0;
108 void grep_init(struct grep_opt *opt, struct repository *repo)
110 struct grep_opt blank = GREP_OPT_INIT;
111 memcpy(opt, &blank, sizeof(*opt));
113 opt->repo = repo;
114 opt->pattern_tail = &opt->pattern_list;
115 opt->header_tail = &opt->header_list;
118 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
121 * When committing to the pattern type by setting the relevant
122 * fields in grep_opt it's generally not necessary to zero out
123 * the fields we're not choosing, since they won't have been
124 * set by anything. The extended_regexp_option field is the
125 * only exception to this.
127 * This is because in the process of parsing grep.patternType
128 * & grep.extendedRegexp we set opt->pattern_type_option and
129 * opt->extended_regexp_option, respectively. We then
130 * internally use opt->extended_regexp_option to see if we're
131 * compiling an ERE. It must be unset if that's not actually
132 * the case.
134 if (pattern_type != GREP_PATTERN_TYPE_ERE &&
135 opt->extended_regexp_option)
136 opt->extended_regexp_option = 0;
138 switch (pattern_type) {
139 case GREP_PATTERN_TYPE_UNSPECIFIED:
140 /* fall through */
142 case GREP_PATTERN_TYPE_BRE:
143 break;
145 case GREP_PATTERN_TYPE_ERE:
146 opt->extended_regexp_option = 1;
147 break;
149 case GREP_PATTERN_TYPE_FIXED:
150 opt->fixed = 1;
151 break;
153 case GREP_PATTERN_TYPE_PCRE:
154 opt->pcre2 = 1;
155 break;
159 void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
161 if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
162 grep_set_pattern_type_option(pattern_type, opt);
163 else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
164 grep_set_pattern_type_option(opt->pattern_type_option, opt);
165 else if (opt->extended_regexp_option)
167 * This branch *must* happen after setting from the
168 * opt->pattern_type_option above, we don't want
169 * grep.extendedRegexp to override grep.patternType!
171 grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
174 static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
175 const char *origin, int no,
176 enum grep_pat_token t,
177 enum grep_header_field field)
179 struct grep_pat *p = xcalloc(1, sizeof(*p));
180 p->pattern = xmemdupz(pat, patlen);
181 p->patternlen = patlen;
182 p->origin = origin;
183 p->no = no;
184 p->token = t;
185 p->field = field;
186 return p;
189 static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
191 **tail = p;
192 *tail = &p->next;
193 p->next = NULL;
195 switch (p->token) {
196 case GREP_PATTERN: /* atom */
197 case GREP_PATTERN_HEAD:
198 case GREP_PATTERN_BODY:
199 for (;;) {
200 struct grep_pat *new_pat;
201 size_t len = 0;
202 char *cp = p->pattern + p->patternlen, *nl = NULL;
203 while (++len <= p->patternlen) {
204 if (*(--cp) == '\n') {
205 nl = cp;
206 break;
209 if (!nl)
210 break;
211 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
212 p->no, p->token, p->field);
213 new_pat->next = p->next;
214 if (!p->next)
215 *tail = &new_pat->next;
216 p->next = new_pat;
217 *nl = '\0';
218 p->patternlen -= len;
220 break;
221 default:
222 break;
226 void append_header_grep_pattern(struct grep_opt *opt,
227 enum grep_header_field field, const char *pat)
229 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
230 GREP_PATTERN_HEAD, field);
231 if (field == GREP_HEADER_REFLOG)
232 opt->use_reflog_filter = 1;
233 do_append_grep_pat(&opt->header_tail, p);
236 void append_grep_pattern(struct grep_opt *opt, const char *pat,
237 const char *origin, int no, enum grep_pat_token t)
239 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
242 void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
243 const char *origin, int no, enum grep_pat_token t)
245 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
246 do_append_grep_pat(&opt->pattern_tail, p);
249 struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
251 struct grep_pat *pat;
252 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
253 *ret = *opt;
255 ret->pattern_list = NULL;
256 ret->pattern_tail = &ret->pattern_list;
258 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
260 if(pat->token == GREP_PATTERN_HEAD)
261 append_header_grep_pattern(ret, pat->field,
262 pat->pattern);
263 else
264 append_grep_pat(ret, pat->pattern, pat->patternlen,
265 pat->origin, pat->no, pat->token);
268 return ret;
271 static NORETURN void compile_regexp_failed(const struct grep_pat *p,
272 const char *error)
274 char where[1024];
276 if (p->no)
277 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
278 else if (p->origin)
279 xsnprintf(where, sizeof(where), "%s, ", p->origin);
280 else
281 where[0] = 0;
283 die("%s'%s': %s", where, p->pattern, error);
286 static int is_fixed(const char *s, size_t len)
288 size_t i;
290 for (i = 0; i < len; i++) {
291 if (is_regex_special(s[i]))
292 return 0;
295 return 1;
298 #ifdef USE_LIBPCRE2
299 #define GREP_PCRE2_DEBUG_MALLOC 0
301 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
303 void *pointer = malloc(size);
304 #if GREP_PCRE2_DEBUG_MALLOC
305 static int count = 1;
306 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
307 #endif
308 return pointer;
311 static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
313 #if GREP_PCRE2_DEBUG_MALLOC
314 static int count = 1;
315 if (pointer)
316 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
317 #endif
318 free(pointer);
321 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
323 int error;
324 PCRE2_UCHAR errbuf[256];
325 PCRE2_SIZE erroffset;
326 int options = PCRE2_MULTILINE;
327 int jitret;
328 int patinforet;
329 size_t jitsizearg;
330 int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
333 * Call pcre2_general_context_create() before calling any
334 * other pcre2_*(). It sets up our malloc()/free() functions
335 * with which everything else is allocated.
337 p->pcre2_general_context = pcre2_general_context_create(
338 pcre2_malloc, pcre2_free, NULL);
339 if (!p->pcre2_general_context)
340 die("Couldn't allocate PCRE2 general context");
342 if (opt->ignore_case) {
343 if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
344 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
345 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
346 pcre2_set_character_tables(p->pcre2_compile_context,
347 p->pcre2_tables);
349 options |= PCRE2_CASELESS;
351 if (!opt->ignore_locale && is_utf8_locale() && !literal)
352 options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
354 #ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER
355 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
356 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
357 options |= PCRE2_NO_START_OPTIMIZE;
358 #endif
360 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
361 p->patternlen, options, &error, &erroffset,
362 p->pcre2_compile_context);
364 if (p->pcre2_pattern) {
365 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
366 if (!p->pcre2_match_data)
367 die("Couldn't allocate PCRE2 match data");
368 } else {
369 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
370 compile_regexp_failed(p, (const char *)&errbuf);
373 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
374 if (p->pcre2_jit_on) {
375 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
376 if (jitret)
377 die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
380 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
381 * tells us whether the library itself supports JIT,
382 * but to see whether we're going to be actually using
383 * JIT we need to extract PCRE2_INFO_JITSIZE from the
384 * pattern *after* we do pcre2_jit_compile() above.
386 * This is because if the pattern contains the
387 * (*NO_JIT) verb (see pcre2syntax(3))
388 * pcre2_jit_compile() will exit early with 0. If we
389 * then proceed to call pcre2_jit_match() further down
390 * the line instead of pcre2_match() we'll either
391 * segfault (pre PCRE 10.31) or run into a fatal error
392 * (post PCRE2 10.31)
394 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
395 if (patinforet)
396 BUG("pcre2_pattern_info() failed: %d", patinforet);
397 if (jitsizearg == 0) {
398 p->pcre2_jit_on = 0;
399 return;
404 static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
405 regmatch_t *match, int eflags)
407 int ret, flags = 0;
408 PCRE2_SIZE *ovector;
409 PCRE2_UCHAR errbuf[256];
411 if (eflags & REG_NOTBOL)
412 flags |= PCRE2_NOTBOL;
414 if (p->pcre2_jit_on)
415 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
416 eol - line, 0, flags, p->pcre2_match_data,
417 NULL);
418 else
419 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
420 eol - line, 0, flags, p->pcre2_match_data,
421 NULL);
423 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
424 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
425 die("%s failed with error code %d: %s",
426 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
427 errbuf);
429 if (ret > 0) {
430 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
431 ret = 0;
432 match->rm_so = (int)ovector[0];
433 match->rm_eo = (int)ovector[1];
436 return ret;
439 static void free_pcre2_pattern(struct grep_pat *p)
441 pcre2_compile_context_free(p->pcre2_compile_context);
442 pcre2_code_free(p->pcre2_pattern);
443 pcre2_match_data_free(p->pcre2_match_data);
444 #ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
445 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
446 #else
447 free((void *)p->pcre2_tables);
448 #endif
449 pcre2_general_context_free(p->pcre2_general_context);
451 #else /* !USE_LIBPCRE2 */
452 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
454 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
457 static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
458 regmatch_t *match, int eflags)
460 return 1;
463 static void free_pcre2_pattern(struct grep_pat *p)
467 static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
469 struct strbuf sb = STRBUF_INIT;
470 int err;
471 int regflags = 0;
473 basic_regex_quote_buf(&sb, p->pattern);
474 if (opt->ignore_case)
475 regflags |= REG_ICASE;
476 err = regcomp(&p->regexp, sb.buf, regflags);
477 strbuf_release(&sb);
478 if (err) {
479 char errbuf[1024];
480 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
481 compile_regexp_failed(p, errbuf);
484 #endif /* !USE_LIBPCRE2 */
486 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
488 int err;
489 int regflags = REG_NEWLINE;
491 p->word_regexp = opt->word_regexp;
492 p->ignore_case = opt->ignore_case;
493 p->fixed = opt->fixed;
495 if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
496 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
498 p->is_fixed = is_fixed(p->pattern, p->patternlen);
499 #ifdef USE_LIBPCRE2
500 if (!p->fixed && !p->is_fixed) {
501 const char *no_jit = "(*NO_JIT)";
502 const int no_jit_len = strlen(no_jit);
503 if (starts_with(p->pattern, no_jit) &&
504 is_fixed(p->pattern + no_jit_len,
505 p->patternlen - no_jit_len))
506 p->is_fixed = 1;
508 #endif
509 if (p->fixed || p->is_fixed) {
510 #ifdef USE_LIBPCRE2
511 if (p->is_fixed) {
512 compile_pcre2_pattern(p, opt);
513 } else {
515 * E.g. t7811-grep-open.sh relies on the
516 * pattern being restored.
518 char *old_pattern = p->pattern;
519 size_t old_patternlen = p->patternlen;
520 struct strbuf sb = STRBUF_INIT;
523 * There is the PCRE2_LITERAL flag, but it's
524 * only in PCRE v2 10.30 and later. Needing to
525 * ifdef our way around that and dealing with
526 * it + PCRE2_MULTILINE being an error is more
527 * complex than just quoting this ourselves.
529 strbuf_add(&sb, "\\Q", 2);
530 strbuf_add(&sb, p->pattern, p->patternlen);
531 strbuf_add(&sb, "\\E", 2);
533 p->pattern = sb.buf;
534 p->patternlen = sb.len;
535 compile_pcre2_pattern(p, opt);
536 p->pattern = old_pattern;
537 p->patternlen = old_patternlen;
538 strbuf_release(&sb);
540 #else /* !USE_LIBPCRE2 */
541 compile_fixed_regexp(p, opt);
542 #endif /* !USE_LIBPCRE2 */
543 return;
546 if (opt->pcre2) {
547 compile_pcre2_pattern(p, opt);
548 return;
551 if (p->ignore_case)
552 regflags |= REG_ICASE;
553 if (opt->extended_regexp_option)
554 regflags |= REG_EXTENDED;
555 err = regcomp(&p->regexp, p->pattern, regflags);
556 if (err) {
557 char errbuf[1024];
558 regerror(err, &p->regexp, errbuf, 1024);
559 compile_regexp_failed(p, errbuf);
563 static struct grep_expr *compile_pattern_or(struct grep_pat **);
564 static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
566 struct grep_pat *p;
567 struct grep_expr *x;
569 p = *list;
570 if (!p)
571 return NULL;
572 switch (p->token) {
573 case GREP_PATTERN: /* atom */
574 case GREP_PATTERN_HEAD:
575 case GREP_PATTERN_BODY:
576 CALLOC_ARRAY(x, 1);
577 x->node = GREP_NODE_ATOM;
578 x->u.atom = p;
579 *list = p->next;
580 return x;
581 case GREP_OPEN_PAREN:
582 *list = p->next;
583 x = compile_pattern_or(list);
584 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
585 die("unmatched parenthesis");
586 *list = (*list)->next;
587 return x;
588 default:
589 return NULL;
593 static struct grep_expr *compile_pattern_not(struct grep_pat **list)
595 struct grep_pat *p;
596 struct grep_expr *x;
598 p = *list;
599 if (!p)
600 return NULL;
601 switch (p->token) {
602 case GREP_NOT:
603 if (!p->next)
604 die("--not not followed by pattern expression");
605 *list = p->next;
606 CALLOC_ARRAY(x, 1);
607 x->node = GREP_NODE_NOT;
608 x->u.unary = compile_pattern_not(list);
609 if (!x->u.unary)
610 die("--not followed by non pattern expression");
611 return x;
612 default:
613 return compile_pattern_atom(list);
617 static struct grep_expr *compile_pattern_and(struct grep_pat **list)
619 struct grep_pat *p;
620 struct grep_expr *x, *y, *z;
622 x = compile_pattern_not(list);
623 p = *list;
624 if (p && p->token == GREP_AND) {
625 if (!x)
626 die("--and not preceded by pattern expression");
627 if (!p->next)
628 die("--and not followed by pattern expression");
629 *list = p->next;
630 y = compile_pattern_and(list);
631 if (!y)
632 die("--and not followed by pattern expression");
633 CALLOC_ARRAY(z, 1);
634 z->node = GREP_NODE_AND;
635 z->u.binary.left = x;
636 z->u.binary.right = y;
637 return z;
639 return x;
642 static struct grep_expr *compile_pattern_or(struct grep_pat **list)
644 struct grep_pat *p;
645 struct grep_expr *x, *y, *z;
647 x = compile_pattern_and(list);
648 p = *list;
649 if (x && p && p->token != GREP_CLOSE_PAREN) {
650 y = compile_pattern_or(list);
651 if (!y)
652 die("not a pattern expression %s", p->pattern);
653 CALLOC_ARRAY(z, 1);
654 z->node = GREP_NODE_OR;
655 z->u.binary.left = x;
656 z->u.binary.right = y;
657 return z;
659 return x;
662 static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
664 return compile_pattern_or(list);
667 static struct grep_expr *grep_not_expr(struct grep_expr *expr)
669 struct grep_expr *z = xcalloc(1, sizeof(*z));
670 z->node = GREP_NODE_NOT;
671 z->u.unary = expr;
672 return z;
675 static struct grep_expr *grep_true_expr(void)
677 struct grep_expr *z = xcalloc(1, sizeof(*z));
678 z->node = GREP_NODE_TRUE;
679 return z;
682 static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
684 struct grep_expr *z = xcalloc(1, sizeof(*z));
685 z->node = GREP_NODE_OR;
686 z->u.binary.left = left;
687 z->u.binary.right = right;
688 return z;
691 static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
693 struct grep_pat *p;
694 struct grep_expr *header_expr;
695 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
696 enum grep_header_field fld;
698 if (!opt->header_list)
699 return NULL;
701 for (p = opt->header_list; p; p = p->next) {
702 if (p->token != GREP_PATTERN_HEAD)
703 BUG("a non-header pattern in grep header list.");
704 if (p->field < GREP_HEADER_FIELD_MIN ||
705 GREP_HEADER_FIELD_MAX <= p->field)
706 BUG("unknown header field %d", p->field);
707 compile_regexp(p, opt);
710 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
711 header_group[fld] = NULL;
713 for (p = opt->header_list; p; p = p->next) {
714 struct grep_expr *h;
715 struct grep_pat *pp = p;
717 h = compile_pattern_atom(&pp);
718 if (!h || pp != p->next)
719 BUG("malformed header expr");
720 if (!header_group[p->field]) {
721 header_group[p->field] = h;
722 continue;
724 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
727 header_expr = NULL;
729 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
730 if (!header_group[fld])
731 continue;
732 if (!header_expr)
733 header_expr = grep_true_expr();
734 header_expr = grep_or_expr(header_group[fld], header_expr);
736 return header_expr;
739 static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
741 struct grep_expr *z = x;
743 while (x) {
744 assert(x->node == GREP_NODE_OR);
745 if (x->u.binary.right &&
746 x->u.binary.right->node == GREP_NODE_TRUE) {
747 x->u.binary.right = y;
748 break;
750 x = x->u.binary.right;
752 return z;
755 void compile_grep_patterns(struct grep_opt *opt)
757 struct grep_pat *p;
758 struct grep_expr *header_expr = prep_header_patterns(opt);
760 for (p = opt->pattern_list; p; p = p->next) {
761 switch (p->token) {
762 case GREP_PATTERN: /* atom */
763 case GREP_PATTERN_HEAD:
764 case GREP_PATTERN_BODY:
765 compile_regexp(p, opt);
766 break;
767 default:
768 opt->extended = 1;
769 break;
773 if (opt->all_match || opt->no_body_match || header_expr)
774 opt->extended = 1;
775 else if (!opt->extended)
776 return;
778 p = opt->pattern_list;
779 if (p)
780 opt->pattern_expression = compile_pattern_expr(&p);
781 if (p)
782 die("incomplete pattern expression: %s", p->pattern);
784 if (opt->no_body_match && opt->pattern_expression)
785 opt->pattern_expression = grep_not_expr(opt->pattern_expression);
787 if (!header_expr)
788 return;
790 if (!opt->pattern_expression)
791 opt->pattern_expression = header_expr;
792 else if (opt->all_match)
793 opt->pattern_expression = grep_splice_or(header_expr,
794 opt->pattern_expression);
795 else
796 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
797 header_expr);
798 opt->all_match = 1;
801 static void free_pattern_expr(struct grep_expr *x)
803 switch (x->node) {
804 case GREP_NODE_TRUE:
805 case GREP_NODE_ATOM:
806 break;
807 case GREP_NODE_NOT:
808 free_pattern_expr(x->u.unary);
809 break;
810 case GREP_NODE_AND:
811 case GREP_NODE_OR:
812 free_pattern_expr(x->u.binary.left);
813 free_pattern_expr(x->u.binary.right);
814 break;
816 free(x);
819 void free_grep_patterns(struct grep_opt *opt)
821 struct grep_pat *p, *n;
823 for (p = opt->pattern_list; p; p = n) {
824 n = p->next;
825 switch (p->token) {
826 case GREP_PATTERN: /* atom */
827 case GREP_PATTERN_HEAD:
828 case GREP_PATTERN_BODY:
829 if (p->pcre2_pattern)
830 free_pcre2_pattern(p);
831 else
832 regfree(&p->regexp);
833 free(p->pattern);
834 break;
835 default:
836 break;
838 free(p);
841 if (!opt->extended)
842 return;
843 free_pattern_expr(opt->pattern_expression);
846 static const char *end_of_line(const char *cp, unsigned long *left)
848 unsigned long l = *left;
849 while (l && *cp != '\n') {
850 l--;
851 cp++;
853 *left = l;
854 return cp;
857 static int word_char(char ch)
859 return isalnum(ch) || ch == '_';
862 static void output_color(struct grep_opt *opt, const void *data, size_t size,
863 const char *color)
865 if (want_color(opt->color) && color && color[0]) {
866 opt->output(opt, color, strlen(color));
867 opt->output(opt, data, size);
868 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
869 } else
870 opt->output(opt, data, size);
873 static void output_sep(struct grep_opt *opt, char sign)
875 if (opt->null_following_name)
876 opt->output(opt, "\0", 1);
877 else
878 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
881 static void show_name(struct grep_opt *opt, const char *name)
883 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
884 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
887 static int patmatch(struct grep_pat *p,
888 const char *line, const char *eol,
889 regmatch_t *match, int eflags)
891 int hit;
893 if (p->pcre2_pattern)
894 hit = !pcre2match(p, line, eol, match, eflags);
895 else
896 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
897 eflags);
899 return hit;
902 static void strip_timestamp(const char *bol, const char **eol_p)
904 const char *eol = *eol_p;
906 while (bol < --eol) {
907 if (*eol != '>')
908 continue;
909 *eol_p = ++eol;
910 break;
914 static struct {
915 const char *field;
916 size_t len;
917 } header_field[] = {
918 { "author ", 7 },
919 { "committer ", 10 },
920 { "reflog ", 7 },
923 static int headerless_match_one_pattern(struct grep_pat *p,
924 const char *bol, const char *eol,
925 enum grep_context ctx,
926 regmatch_t *pmatch, int eflags)
928 int hit = 0;
929 const char *start = bol;
931 if ((p->token != GREP_PATTERN) &&
932 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
933 return 0;
935 again:
936 hit = patmatch(p, bol, eol, pmatch, eflags);
938 if (hit && p->word_regexp) {
939 if ((pmatch[0].rm_so < 0) ||
940 (eol - bol) < pmatch[0].rm_so ||
941 (pmatch[0].rm_eo < 0) ||
942 (eol - bol) < pmatch[0].rm_eo)
943 die("regexp returned nonsense");
945 /* Match beginning must be either beginning of the
946 * line, or at word boundary (i.e. the last char must
947 * not be a word char). Similarly, match end must be
948 * either end of the line, or at word boundary
949 * (i.e. the next char must not be a word char).
951 if ( ((pmatch[0].rm_so == 0) ||
952 !word_char(bol[pmatch[0].rm_so-1])) &&
953 ((pmatch[0].rm_eo == (eol-bol)) ||
954 !word_char(bol[pmatch[0].rm_eo])) )
956 else
957 hit = 0;
959 /* Words consist of at least one character. */
960 if (pmatch->rm_so == pmatch->rm_eo)
961 hit = 0;
963 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
964 /* There could be more than one match on the
965 * line, and the first match might not be
966 * strict word match. But later ones could be!
967 * Forward to the next possible start, i.e. the
968 * next position following a non-word char.
970 bol = pmatch[0].rm_so + bol + 1;
971 while (word_char(bol[-1]) && bol < eol)
972 bol++;
973 eflags |= REG_NOTBOL;
974 if (bol < eol)
975 goto again;
978 if (hit) {
979 pmatch[0].rm_so += bol - start;
980 pmatch[0].rm_eo += bol - start;
982 return hit;
985 static int match_one_pattern(struct grep_pat *p,
986 const char *bol, const char *eol,
987 enum grep_context ctx, regmatch_t *pmatch,
988 int eflags)
990 const char *field;
991 size_t len;
993 if (p->token == GREP_PATTERN_HEAD) {
994 assert(p->field < ARRAY_SIZE(header_field));
995 field = header_field[p->field].field;
996 len = header_field[p->field].len;
997 if (strncmp(bol, field, len))
998 return 0;
999 bol += len;
1001 switch (p->field) {
1002 case GREP_HEADER_AUTHOR:
1003 case GREP_HEADER_COMMITTER:
1004 strip_timestamp(bol, &eol);
1005 break;
1006 default:
1007 break;
1011 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
1015 static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
1016 const char *bol, const char *eol,
1017 enum grep_context ctx, ssize_t *col,
1018 ssize_t *icol, int collect_hits)
1020 int h = 0;
1022 if (!x)
1023 die("Not a valid grep expression");
1024 switch (x->node) {
1025 case GREP_NODE_TRUE:
1026 h = 1;
1027 break;
1028 case GREP_NODE_ATOM:
1030 regmatch_t tmp;
1031 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1032 &tmp, 0);
1033 if (h && (*col < 0 || tmp.rm_so < *col))
1034 *col = tmp.rm_so;
1036 if (x->u.atom->token == GREP_PATTERN_BODY)
1037 opt->body_hit |= h;
1038 break;
1039 case GREP_NODE_NOT:
1041 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1043 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1045 break;
1046 case GREP_NODE_AND:
1047 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1048 icol, 0);
1049 if (h || opt->columnnum) {
1051 * Don't short-circuit AND when given --column, since a
1052 * NOT earlier in the tree may turn this into an OR. In
1053 * this case, see the below comment.
1055 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1056 ctx, col, icol, 0);
1058 break;
1059 case GREP_NODE_OR:
1060 if (!(collect_hits || opt->columnnum)) {
1062 * Don't short-circuit OR when given --column (or
1063 * collecting hits) to ensure we don't skip a later
1064 * child that would produce an earlier match.
1066 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1067 ctx, col, icol, 0) ||
1068 match_expr_eval(opt, x->u.binary.right, bol,
1069 eol, ctx, col, icol, 0));
1071 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1072 icol, 0);
1073 if (collect_hits)
1074 x->u.binary.left->hit |= h;
1075 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1076 icol, collect_hits);
1077 break;
1078 default:
1079 die("Unexpected node type (internal error) %d", x->node);
1081 if (collect_hits)
1082 x->hit |= h;
1083 return h;
1086 static int match_expr(struct grep_opt *opt,
1087 const char *bol, const char *eol,
1088 enum grep_context ctx, ssize_t *col,
1089 ssize_t *icol, int collect_hits)
1091 struct grep_expr *x = opt->pattern_expression;
1092 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1095 static int match_line(struct grep_opt *opt,
1096 const char *bol, const char *eol,
1097 ssize_t *col, ssize_t *icol,
1098 enum grep_context ctx, int collect_hits)
1100 struct grep_pat *p;
1101 int hit = 0;
1103 if (opt->extended)
1104 return match_expr(opt, bol, eol, ctx, col, icol,
1105 collect_hits);
1107 /* we do not call with collect_hits without being extended */
1108 for (p = opt->pattern_list; p; p = p->next) {
1109 regmatch_t tmp;
1110 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1111 hit |= 1;
1112 if (!opt->columnnum) {
1114 * Without --column, any single match on a line
1115 * is enough to know that it needs to be
1116 * printed. With --column, scan _all_ patterns
1117 * to find the earliest.
1119 break;
1121 if (*col < 0 || tmp.rm_so < *col)
1122 *col = tmp.rm_so;
1125 return hit;
1128 static int match_next_pattern(struct grep_pat *p,
1129 const char *bol, const char *eol,
1130 enum grep_context ctx,
1131 regmatch_t *pmatch, int eflags)
1133 regmatch_t match;
1135 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
1136 return 0;
1137 if (match.rm_so < 0 || match.rm_eo < 0)
1138 return 0;
1139 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1140 if (match.rm_so > pmatch->rm_so)
1141 return 1;
1142 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1143 return 1;
1145 pmatch->rm_so = match.rm_so;
1146 pmatch->rm_eo = match.rm_eo;
1147 return 1;
1150 int grep_next_match(struct grep_opt *opt,
1151 const char *bol, const char *eol,
1152 enum grep_context ctx, regmatch_t *pmatch,
1153 enum grep_header_field field, int eflags)
1155 struct grep_pat *p;
1156 int hit = 0;
1158 pmatch->rm_so = pmatch->rm_eo = -1;
1159 if (bol < eol) {
1160 for (p = ((ctx == GREP_CONTEXT_HEAD)
1161 ? opt->header_list : opt->pattern_list);
1162 p; p = p->next) {
1163 switch (p->token) {
1164 case GREP_PATTERN_HEAD:
1165 if ((field != GREP_HEADER_FIELD_MAX) &&
1166 (p->field != field))
1167 continue;
1168 /* fall thru */
1169 case GREP_PATTERN: /* atom */
1170 case GREP_PATTERN_BODY:
1171 hit |= match_next_pattern(p, bol, eol, ctx,
1172 pmatch, eflags);
1173 break;
1174 default:
1175 break;
1179 return hit;
1182 static void show_line_header(struct grep_opt *opt, const char *name,
1183 unsigned lno, ssize_t cno, char sign)
1185 if (opt->heading && opt->last_shown == 0) {
1186 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1187 opt->output(opt, "\n", 1);
1189 opt->last_shown = lno;
1191 if (!opt->heading && opt->pathname) {
1192 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1193 output_sep(opt, sign);
1195 if (opt->linenum) {
1196 char buf[32];
1197 xsnprintf(buf, sizeof(buf), "%d", lno);
1198 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
1199 output_sep(opt, sign);
1202 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1203 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1204 * being called with a context line.
1206 if (opt->columnnum && cno) {
1207 char buf[32];
1208 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
1209 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
1210 output_sep(opt, sign);
1214 static void show_line(struct grep_opt *opt,
1215 const char *bol, const char *eol,
1216 const char *name, unsigned lno, ssize_t cno, char sign)
1218 int rest = eol - bol;
1219 const char *match_color = NULL;
1220 const char *line_color = NULL;
1222 if (opt->file_break && opt->last_shown == 0) {
1223 if (opt->show_hunk_mark)
1224 opt->output(opt, "\n", 1);
1225 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1226 if (opt->last_shown == 0) {
1227 if (opt->show_hunk_mark) {
1228 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1229 opt->output(opt, "\n", 1);
1231 } else if (lno > opt->last_shown + 1) {
1232 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1233 opt->output(opt, "\n", 1);
1236 if (!opt->only_matching) {
1238 * In case the line we're being called with contains more than
1239 * one match, leave printing each header to the loop below.
1241 show_line_header(opt, name, lno, cno, sign);
1243 if (opt->color || opt->only_matching) {
1244 regmatch_t match;
1245 enum grep_context ctx = GREP_CONTEXT_BODY;
1246 int eflags = 0;
1248 if (opt->color) {
1249 if (sign == ':')
1250 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
1251 else
1252 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
1253 if (sign == ':')
1254 line_color = opt->colors[GREP_COLOR_SELECTED];
1255 else if (sign == '-')
1256 line_color = opt->colors[GREP_COLOR_CONTEXT];
1257 else if (sign == '=')
1258 line_color = opt->colors[GREP_COLOR_FUNCTION];
1260 while (grep_next_match(opt, bol, eol, ctx, &match,
1261 GREP_HEADER_FIELD_MAX, eflags)) {
1262 if (match.rm_so == match.rm_eo)
1263 break;
1265 if (opt->only_matching)
1266 show_line_header(opt, name, lno, cno, sign);
1267 else
1268 output_color(opt, bol, match.rm_so, line_color);
1269 output_color(opt, bol + match.rm_so,
1270 match.rm_eo - match.rm_so, match_color);
1271 if (opt->only_matching)
1272 opt->output(opt, "\n", 1);
1273 bol += match.rm_eo;
1274 cno += match.rm_eo;
1275 rest -= match.rm_eo;
1276 eflags = REG_NOTBOL;
1279 if (!opt->only_matching) {
1280 output_color(opt, bol, rest, line_color);
1281 opt->output(opt, "\n", 1);
1285 int grep_use_locks;
1288 * This lock protects access to the gitattributes machinery, which is
1289 * not thread-safe.
1291 pthread_mutex_t grep_attr_mutex;
1293 static inline void grep_attr_lock(void)
1295 if (grep_use_locks)
1296 pthread_mutex_lock(&grep_attr_mutex);
1299 static inline void grep_attr_unlock(void)
1301 if (grep_use_locks)
1302 pthread_mutex_unlock(&grep_attr_mutex);
1305 static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
1306 const char *bol, const char *eol)
1308 xdemitconf_t *xecfg = opt->priv;
1309 if (xecfg && !xecfg->find_func) {
1310 grep_source_load_driver(gs, opt->repo->index);
1311 if (gs->driver->funcname.pattern) {
1312 const struct userdiff_funcname *pe = &gs->driver->funcname;
1313 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1314 } else {
1315 xecfg = opt->priv = NULL;
1319 if (xecfg) {
1320 char buf[1];
1321 return xecfg->find_func(bol, eol - bol, buf, 1,
1322 xecfg->find_func_priv) >= 0;
1325 if (bol == eol)
1326 return 0;
1327 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1328 return 1;
1329 return 0;
1332 static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1333 const char *bol, unsigned lno)
1335 while (bol > gs->buf) {
1336 const char *eol = --bol;
1338 while (bol > gs->buf && bol[-1] != '\n')
1339 bol--;
1340 lno--;
1342 if (lno <= opt->last_shown)
1343 break;
1345 if (match_funcname(opt, gs, bol, eol)) {
1346 show_line(opt, bol, eol, gs->name, lno, 0, '=');
1347 break;
1352 static int is_empty_line(const char *bol, const char *eol);
1354 static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1355 const char *bol, const char *end, unsigned lno)
1357 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1358 int funcname_needed = !!opt->funcname, comment_needed = 0;
1360 if (opt->pre_context < lno)
1361 from = lno - opt->pre_context;
1362 if (from <= opt->last_shown)
1363 from = opt->last_shown + 1;
1364 orig_from = from;
1365 if (opt->funcbody) {
1366 if (match_funcname(opt, gs, bol, end))
1367 comment_needed = 1;
1368 else
1369 funcname_needed = 1;
1370 from = opt->last_shown + 1;
1373 /* Rewind. */
1374 while (bol > gs->buf && cur > from) {
1375 const char *next_bol = bol;
1376 const char *eol = --bol;
1378 while (bol > gs->buf && bol[-1] != '\n')
1379 bol--;
1380 cur--;
1381 if (comment_needed && (is_empty_line(bol, eol) ||
1382 match_funcname(opt, gs, bol, eol))) {
1383 comment_needed = 0;
1384 from = orig_from;
1385 if (cur < from) {
1386 cur++;
1387 bol = next_bol;
1388 break;
1391 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1392 funcname_lno = cur;
1393 funcname_needed = 0;
1394 if (opt->funcbody)
1395 comment_needed = 1;
1396 else
1397 from = orig_from;
1401 /* We need to look even further back to find a function signature. */
1402 if (opt->funcname && funcname_needed)
1403 show_funcname_line(opt, gs, bol, cur);
1405 /* Back forward. */
1406 while (cur < lno) {
1407 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1409 while (*eol != '\n')
1410 eol++;
1411 show_line(opt, bol, eol, gs->name, cur, 0, sign);
1412 bol = eol + 1;
1413 cur++;
1417 static int should_lookahead(struct grep_opt *opt)
1419 struct grep_pat *p;
1421 if (opt->extended)
1422 return 0; /* punt for too complex stuff */
1423 if (opt->invert)
1424 return 0;
1425 for (p = opt->pattern_list; p; p = p->next) {
1426 if (p->token != GREP_PATTERN)
1427 return 0; /* punt for "header only" and stuff */
1429 return 1;
1432 static int look_ahead(struct grep_opt *opt,
1433 unsigned long *left_p,
1434 unsigned *lno_p,
1435 const char **bol_p)
1437 unsigned lno = *lno_p;
1438 const char *bol = *bol_p;
1439 struct grep_pat *p;
1440 const char *sp, *last_bol;
1441 regoff_t earliest = -1;
1443 for (p = opt->pattern_list; p; p = p->next) {
1444 int hit;
1445 regmatch_t m;
1447 hit = patmatch(p, bol, bol + *left_p, &m, 0);
1448 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1449 continue;
1450 if (earliest < 0 || m.rm_so < earliest)
1451 earliest = m.rm_so;
1454 if (earliest < 0) {
1455 *bol_p = bol + *left_p;
1456 *left_p = 0;
1457 return 1;
1459 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1460 ; /* find the beginning of the line */
1461 last_bol = sp;
1463 for (sp = bol; sp < last_bol; sp++) {
1464 if (*sp == '\n')
1465 lno++;
1467 *left_p -= last_bol - bol;
1468 *bol_p = last_bol;
1469 *lno_p = lno;
1470 return 0;
1473 static int fill_textconv_grep(struct repository *r,
1474 struct userdiff_driver *driver,
1475 struct grep_source *gs)
1477 struct diff_filespec *df;
1478 char *buf;
1479 size_t size;
1481 if (!driver || !driver->textconv)
1482 return grep_source_load(gs);
1485 * The textconv interface is intimately tied to diff_filespecs, so we
1486 * have to pretend to be one. If we could unify the grep_source
1487 * and diff_filespec structs, this mess could just go away.
1489 df = alloc_filespec(gs->path);
1490 switch (gs->type) {
1491 case GREP_SOURCE_OID:
1492 fill_filespec(df, gs->identifier, 1, 0100644);
1493 break;
1494 case GREP_SOURCE_FILE:
1495 fill_filespec(df, null_oid(), 0, 0100644);
1496 break;
1497 default:
1498 BUG("attempt to textconv something without a path?");
1502 * fill_textconv is not remotely thread-safe; it modifies the global
1503 * diff tempfile structure, writes to the_repo's odb and might
1504 * internally call thread-unsafe functions such as the
1505 * prepare_packed_git() lazy-initializator. Because of the last two, we
1506 * must ensure mutual exclusion between this call and the object reading
1507 * API, thus we use obj_read_lock() here.
1509 * TODO: allowing text conversion to run in parallel with object
1510 * reading operations might increase performance in the multithreaded
1511 * non-worktreee git-grep with --textconv.
1513 obj_read_lock();
1514 size = fill_textconv(r, driver, df, &buf);
1515 obj_read_unlock();
1516 free_filespec(df);
1519 * The normal fill_textconv usage by the diff machinery would just keep
1520 * the textconv'd buf separate from the diff_filespec. But much of the
1521 * grep code passes around a grep_source and assumes that its "buf"
1522 * pointer is the beginning of the thing we are searching. So let's
1523 * install our textconv'd version into the grep_source, taking care not
1524 * to leak any existing buffer.
1526 grep_source_clear_data(gs);
1527 gs->buf = buf;
1528 gs->size = size;
1530 return 0;
1533 static int is_empty_line(const char *bol, const char *eol)
1535 while (bol < eol && isspace(*bol))
1536 bol++;
1537 return bol == eol;
1540 static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1542 const char *bol;
1543 const char *peek_bol = NULL;
1544 unsigned long left;
1545 unsigned lno = 1;
1546 unsigned last_hit = 0;
1547 int binary_match_only = 0;
1548 unsigned count = 0;
1549 int try_lookahead = 0;
1550 int show_function = 0;
1551 struct userdiff_driver *textconv = NULL;
1552 enum grep_context ctx = GREP_CONTEXT_HEAD;
1553 xdemitconf_t xecfg;
1555 if (!opt->status_only && gs->name == NULL)
1556 BUG("grep call which could print a name requires "
1557 "grep_source.name be non-NULL");
1559 if (!opt->output)
1560 opt->output = std_output;
1562 if (opt->pre_context || opt->post_context || opt->file_break ||
1563 opt->funcbody) {
1564 /* Show hunk marks, except for the first file. */
1565 if (opt->last_shown)
1566 opt->show_hunk_mark = 1;
1568 * If we're using threads then we can't easily identify
1569 * the first file. Always put hunk marks in that case
1570 * and skip the very first one later in work_done().
1572 if (opt->output != std_output)
1573 opt->show_hunk_mark = 1;
1575 opt->last_shown = 0;
1577 if (opt->allow_textconv) {
1578 grep_source_load_driver(gs, opt->repo->index);
1580 * We might set up the shared textconv cache data here, which
1581 * is not thread-safe. Also, get_oid_with_context() and
1582 * parse_object() might be internally called. As they are not
1583 * currently thread-safe and might be racy with object reading,
1584 * obj_read_lock() must be called.
1586 grep_attr_lock();
1587 obj_read_lock();
1588 textconv = userdiff_get_textconv(opt->repo, gs->driver);
1589 obj_read_unlock();
1590 grep_attr_unlock();
1594 * We know the result of a textconv is text, so we only have to care
1595 * about binary handling if we are not using it.
1597 if (!textconv) {
1598 switch (opt->binary) {
1599 case GREP_BINARY_DEFAULT:
1600 if (grep_source_is_binary(gs, opt->repo->index))
1601 binary_match_only = 1;
1602 break;
1603 case GREP_BINARY_NOMATCH:
1604 if (grep_source_is_binary(gs, opt->repo->index))
1605 return 0; /* Assume unmatch */
1606 break;
1607 case GREP_BINARY_TEXT:
1608 break;
1609 default:
1610 BUG("unknown binary handling mode");
1614 memset(&xecfg, 0, sizeof(xecfg));
1615 opt->priv = &xecfg;
1617 try_lookahead = should_lookahead(opt);
1619 if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
1620 return 0;
1622 bol = gs->buf;
1623 left = gs->size;
1624 while (left) {
1625 const char *eol;
1626 int hit;
1627 ssize_t cno;
1628 ssize_t col = -1, icol = -1;
1631 * look_ahead() skips quickly to the line that possibly
1632 * has the next hit; don't call it if we need to do
1633 * something more than just skipping the current line
1634 * in response to an unmatch for the current line. E.g.
1635 * inside a post-context window, we will show the current
1636 * line as a context around the previous hit when it
1637 * doesn't hit.
1639 if (try_lookahead
1640 && !(last_hit
1641 && (show_function ||
1642 lno <= last_hit + opt->post_context))
1643 && look_ahead(opt, &left, &lno, &bol))
1644 break;
1645 eol = end_of_line(bol, &left);
1647 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1648 ctx = GREP_CONTEXT_BODY;
1650 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1652 if (collect_hits)
1653 goto next_line;
1655 /* "grep -v -e foo -e bla" should list lines
1656 * that do not have either, so inversion should
1657 * be done outside.
1659 if (opt->invert)
1660 hit = !hit;
1661 if (opt->unmatch_name_only) {
1662 if (hit)
1663 return 0;
1664 goto next_line;
1666 if (hit) {
1667 count++;
1668 if (opt->status_only)
1669 return 1;
1670 if (opt->name_only) {
1671 show_name(opt, gs->name);
1672 return 1;
1674 if (opt->count)
1675 goto next_line;
1676 if (binary_match_only) {
1677 opt->output(opt, "Binary file ", 12);
1678 output_color(opt, gs->name, strlen(gs->name),
1679 opt->colors[GREP_COLOR_FILENAME]);
1680 opt->output(opt, " matches\n", 9);
1681 return 1;
1683 /* Hit at this line. If we haven't shown the
1684 * pre-context lines, we would need to show them.
1686 if (opt->pre_context || opt->funcbody)
1687 show_pre_context(opt, gs, bol, eol, lno);
1688 else if (opt->funcname)
1689 show_funcname_line(opt, gs, bol, lno);
1690 cno = opt->invert ? icol : col;
1691 if (cno < 0) {
1693 * A negative cno indicates that there was no
1694 * match on the line. We are thus inverted and
1695 * being asked to show all lines that _don't_
1696 * match a given expression. Therefore, set cno
1697 * to 0 to suggest the whole line matches.
1699 cno = 0;
1701 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
1702 last_hit = lno;
1703 if (opt->funcbody)
1704 show_function = 1;
1705 goto next_line;
1707 if (show_function && (!peek_bol || peek_bol < bol)) {
1708 unsigned long peek_left = left;
1709 const char *peek_eol = eol;
1712 * Trailing empty lines are not interesting.
1713 * Peek past them to see if they belong to the
1714 * body of the current function.
1716 peek_bol = bol;
1717 while (is_empty_line(peek_bol, peek_eol)) {
1718 peek_bol = peek_eol + 1;
1719 peek_eol = end_of_line(peek_bol, &peek_left);
1722 if (match_funcname(opt, gs, peek_bol, peek_eol))
1723 show_function = 0;
1725 if (show_function ||
1726 (last_hit && lno <= last_hit + opt->post_context)) {
1727 /* If the last hit is within the post context,
1728 * we need to show this line.
1730 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
1733 next_line:
1734 bol = eol + 1;
1735 if (!left)
1736 break;
1737 left--;
1738 lno++;
1741 if (collect_hits)
1742 return 0;
1744 if (opt->status_only)
1745 return opt->unmatch_name_only;
1746 if (opt->unmatch_name_only) {
1747 /* We did not see any hit, so we want to show this */
1748 show_name(opt, gs->name);
1749 return 1;
1752 xdiff_clear_find_func(&xecfg);
1753 opt->priv = NULL;
1755 /* NEEDSWORK:
1756 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1757 * which feels mostly useless but sometimes useful. Maybe
1758 * make it another option? For now suppress them.
1760 if (opt->count && count) {
1761 char buf[32];
1762 if (opt->pathname) {
1763 output_color(opt, gs->name, strlen(gs->name),
1764 opt->colors[GREP_COLOR_FILENAME]);
1765 output_sep(opt, ':');
1767 xsnprintf(buf, sizeof(buf), "%u\n", count);
1768 opt->output(opt, buf, strlen(buf));
1769 return 1;
1771 return !!last_hit;
1774 static void clr_hit_marker(struct grep_expr *x)
1776 /* All-hit markers are meaningful only at the very top level
1777 * OR node.
1779 while (1) {
1780 x->hit = 0;
1781 if (x->node != GREP_NODE_OR)
1782 return;
1783 x->u.binary.left->hit = 0;
1784 x = x->u.binary.right;
1788 static int chk_hit_marker(struct grep_expr *x)
1790 /* Top level nodes have hit markers. See if they all are hits */
1791 while (1) {
1792 if (x->node != GREP_NODE_OR)
1793 return x->hit;
1794 if (!x->u.binary.left->hit)
1795 return 0;
1796 x = x->u.binary.right;
1800 int grep_source(struct grep_opt *opt, struct grep_source *gs)
1803 * we do not have to do the two-pass grep when we do not check
1804 * buffer-wide "all-match".
1806 if (!opt->all_match && !opt->no_body_match)
1807 return grep_source_1(opt, gs, 0);
1809 /* Otherwise the toplevel "or" terms hit a bit differently.
1810 * We first clear hit markers from them.
1812 clr_hit_marker(opt->pattern_expression);
1813 opt->body_hit = 0;
1814 grep_source_1(opt, gs, 1);
1816 if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
1817 return 0;
1818 if (opt->no_body_match && opt->body_hit)
1819 return 0;
1821 return grep_source_1(opt, gs, 0);
1824 static void grep_source_init_buf(struct grep_source *gs,
1825 const char *buf,
1826 unsigned long size)
1828 gs->type = GREP_SOURCE_BUF;
1829 gs->name = NULL;
1830 gs->path = NULL;
1831 gs->buf = buf;
1832 gs->size = size;
1833 gs->driver = NULL;
1834 gs->identifier = NULL;
1837 int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
1839 struct grep_source gs;
1840 int r;
1842 grep_source_init_buf(&gs, buf, size);
1844 r = grep_source(opt, &gs);
1846 grep_source_clear(&gs);
1847 return r;
1850 void grep_source_init_file(struct grep_source *gs, const char *name,
1851 const char *path)
1853 gs->type = GREP_SOURCE_FILE;
1854 gs->name = xstrdup_or_null(name);
1855 gs->path = xstrdup_or_null(path);
1856 gs->buf = NULL;
1857 gs->size = 0;
1858 gs->driver = NULL;
1859 gs->identifier = xstrdup(path);
1862 void grep_source_init_oid(struct grep_source *gs, const char *name,
1863 const char *path, const struct object_id *oid,
1864 struct repository *repo)
1866 gs->type = GREP_SOURCE_OID;
1867 gs->name = xstrdup_or_null(name);
1868 gs->path = xstrdup_or_null(path);
1869 gs->buf = NULL;
1870 gs->size = 0;
1871 gs->driver = NULL;
1872 gs->identifier = oiddup(oid);
1873 gs->repo = repo;
1876 void grep_source_clear(struct grep_source *gs)
1878 FREE_AND_NULL(gs->name);
1879 FREE_AND_NULL(gs->path);
1880 FREE_AND_NULL(gs->identifier);
1881 grep_source_clear_data(gs);
1884 void grep_source_clear_data(struct grep_source *gs)
1886 switch (gs->type) {
1887 case GREP_SOURCE_FILE:
1888 case GREP_SOURCE_OID:
1889 /* these types own the buffer */
1890 free((char *)gs->buf);
1891 gs->buf = NULL;
1892 gs->size = 0;
1893 break;
1894 case GREP_SOURCE_BUF:
1895 /* leave user-provided buf intact */
1896 break;
1900 static int grep_source_load_oid(struct grep_source *gs)
1902 enum object_type type;
1904 gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1905 &gs->size);
1906 if (!gs->buf)
1907 return error(_("'%s': unable to read %s"),
1908 gs->name,
1909 oid_to_hex(gs->identifier));
1910 return 0;
1913 static int grep_source_load_file(struct grep_source *gs)
1915 const char *filename = gs->identifier;
1916 struct stat st;
1917 char *data;
1918 size_t size;
1919 int i;
1921 if (lstat(filename, &st) < 0) {
1922 err_ret:
1923 if (errno != ENOENT)
1924 error_errno(_("failed to stat '%s'"), filename);
1925 return -1;
1927 if (!S_ISREG(st.st_mode))
1928 return -1;
1929 size = xsize_t(st.st_size);
1930 i = open(filename, O_RDONLY);
1931 if (i < 0)
1932 goto err_ret;
1933 data = xmallocz(size);
1934 if (st.st_size != read_in_full(i, data, size)) {
1935 error_errno(_("'%s': short read"), filename);
1936 close(i);
1937 free(data);
1938 return -1;
1940 close(i);
1942 gs->buf = data;
1943 gs->size = size;
1944 return 0;
1947 static int grep_source_load(struct grep_source *gs)
1949 if (gs->buf)
1950 return 0;
1952 switch (gs->type) {
1953 case GREP_SOURCE_FILE:
1954 return grep_source_load_file(gs);
1955 case GREP_SOURCE_OID:
1956 return grep_source_load_oid(gs);
1957 case GREP_SOURCE_BUF:
1958 return gs->buf ? 0 : -1;
1960 BUG("invalid grep_source type to load");
1963 void grep_source_load_driver(struct grep_source *gs,
1964 struct index_state *istate)
1966 if (gs->driver)
1967 return;
1969 grep_attr_lock();
1970 if (gs->path)
1971 gs->driver = userdiff_find_by_path(istate, gs->path);
1972 if (!gs->driver)
1973 gs->driver = userdiff_find_by_name("default");
1974 grep_attr_unlock();
1977 static int grep_source_is_binary(struct grep_source *gs,
1978 struct index_state *istate)
1980 grep_source_load_driver(gs, istate);
1981 if (gs->driver->binary != -1)
1982 return gs->driver->binary;
1984 if (!grep_source_load(gs))
1985 return buffer_is_binary(gs->buf, gs->size);
1987 return 0;