treewide: remove double forward declaration of read_in_full
[alt-git.git] / grep.c
blobd144b374894817c45cd0773bb1d42b884b3e9d9d
1 #include "git-compat-util.h"
2 #include "config.h"
3 #include "gettext.h"
4 #include "grep.h"
5 #include "hex.h"
6 #include "object-store.h"
7 #include "userdiff.h"
8 #include "xdiff-interface.h"
9 #include "diff.h"
10 #include "diffcore.h"
11 #include "commit.h"
12 #include "quote.h"
13 #include "help.h"
14 #include "wrapper.h"
16 static int grep_source_load(struct grep_source *gs);
17 static int grep_source_is_binary(struct grep_source *gs,
18 struct index_state *istate);
20 static void std_output(struct grep_opt *opt, const void *buf, size_t size)
22 fwrite(buf, size, 1, stdout);
25 static const char *color_grep_slots[] = {
26 [GREP_COLOR_CONTEXT] = "context",
27 [GREP_COLOR_FILENAME] = "filename",
28 [GREP_COLOR_FUNCTION] = "function",
29 [GREP_COLOR_LINENO] = "lineNumber",
30 [GREP_COLOR_COLUMNNO] = "column",
31 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
32 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
33 [GREP_COLOR_SELECTED] = "selected",
34 [GREP_COLOR_SEP] = "separator",
37 static int parse_pattern_type_arg(const char *opt, const char *arg)
39 if (!strcmp(arg, "default"))
40 return GREP_PATTERN_TYPE_UNSPECIFIED;
41 else if (!strcmp(arg, "basic"))
42 return GREP_PATTERN_TYPE_BRE;
43 else if (!strcmp(arg, "extended"))
44 return GREP_PATTERN_TYPE_ERE;
45 else if (!strcmp(arg, "fixed"))
46 return GREP_PATTERN_TYPE_FIXED;
47 else if (!strcmp(arg, "perl"))
48 return GREP_PATTERN_TYPE_PCRE;
49 die("bad %s argument: %s", opt, arg);
52 define_list_config_array_extra(color_grep_slots, {"match"});
55 * Read the configuration file once and store it in
56 * the grep_defaults template.
58 int grep_config(const char *var, const char *value, void *cb)
60 struct grep_opt *opt = cb;
61 const char *slot;
63 if (userdiff_config(var, value) < 0)
64 return -1;
66 if (!strcmp(var, "grep.extendedregexp")) {
67 opt->extended_regexp_option = git_config_bool(var, value);
68 return 0;
71 if (!strcmp(var, "grep.patterntype")) {
72 opt->pattern_type_option = parse_pattern_type_arg(var, value);
73 return 0;
76 if (!strcmp(var, "grep.linenumber")) {
77 opt->linenum = git_config_bool(var, value);
78 return 0;
80 if (!strcmp(var, "grep.column")) {
81 opt->columnnum = git_config_bool(var, value);
82 return 0;
85 if (!strcmp(var, "grep.fullname")) {
86 opt->relative = !git_config_bool(var, value);
87 return 0;
90 if (!strcmp(var, "color.grep"))
91 opt->color = git_config_colorbool(var, value);
92 if (!strcmp(var, "color.grep.match")) {
93 if (grep_config("color.grep.matchcontext", value, cb) < 0)
94 return -1;
95 if (grep_config("color.grep.matchselected", value, cb) < 0)
96 return -1;
97 } else if (skip_prefix(var, "color.grep.", &slot)) {
98 int i = LOOKUP_CONFIG(color_grep_slots, slot);
99 char *color;
101 if (i < 0)
102 return -1;
103 color = opt->colors[i];
104 if (!value)
105 return config_error_nonbool(var);
106 return color_parse(value, color);
108 return 0;
111 void grep_init(struct grep_opt *opt, struct repository *repo)
113 struct grep_opt blank = GREP_OPT_INIT;
114 memcpy(opt, &blank, sizeof(*opt));
116 opt->repo = repo;
117 opt->pattern_tail = &opt->pattern_list;
118 opt->header_tail = &opt->header_list;
121 static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
122 const char *origin, int no,
123 enum grep_pat_token t,
124 enum grep_header_field field)
126 struct grep_pat *p = xcalloc(1, sizeof(*p));
127 p->pattern = xmemdupz(pat, patlen);
128 p->patternlen = patlen;
129 p->origin = origin;
130 p->no = no;
131 p->token = t;
132 p->field = field;
133 return p;
136 static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
138 **tail = p;
139 *tail = &p->next;
140 p->next = NULL;
142 switch (p->token) {
143 case GREP_PATTERN: /* atom */
144 case GREP_PATTERN_HEAD:
145 case GREP_PATTERN_BODY:
146 for (;;) {
147 struct grep_pat *new_pat;
148 size_t len = 0;
149 char *cp = p->pattern + p->patternlen, *nl = NULL;
150 while (++len <= p->patternlen) {
151 if (*(--cp) == '\n') {
152 nl = cp;
153 break;
156 if (!nl)
157 break;
158 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
159 p->no, p->token, p->field);
160 new_pat->next = p->next;
161 if (!p->next)
162 *tail = &new_pat->next;
163 p->next = new_pat;
164 *nl = '\0';
165 p->patternlen -= len;
167 break;
168 default:
169 break;
173 void append_header_grep_pattern(struct grep_opt *opt,
174 enum grep_header_field field, const char *pat)
176 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
177 GREP_PATTERN_HEAD, field);
178 if (field == GREP_HEADER_REFLOG)
179 opt->use_reflog_filter = 1;
180 do_append_grep_pat(&opt->header_tail, p);
183 void append_grep_pattern(struct grep_opt *opt, const char *pat,
184 const char *origin, int no, enum grep_pat_token t)
186 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
189 void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
190 const char *origin, int no, enum grep_pat_token t)
192 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
193 do_append_grep_pat(&opt->pattern_tail, p);
196 struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
198 struct grep_pat *pat;
199 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
200 *ret = *opt;
202 ret->pattern_list = NULL;
203 ret->pattern_tail = &ret->pattern_list;
205 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
207 if(pat->token == GREP_PATTERN_HEAD)
208 append_header_grep_pattern(ret, pat->field,
209 pat->pattern);
210 else
211 append_grep_pat(ret, pat->pattern, pat->patternlen,
212 pat->origin, pat->no, pat->token);
215 return ret;
218 static NORETURN void compile_regexp_failed(const struct grep_pat *p,
219 const char *error)
221 char where[1024];
223 if (p->no)
224 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
225 else if (p->origin)
226 xsnprintf(where, sizeof(where), "%s, ", p->origin);
227 else
228 where[0] = 0;
230 die("%s'%s': %s", where, p->pattern, error);
233 static int is_fixed(const char *s, size_t len)
235 size_t i;
237 for (i = 0; i < len; i++) {
238 if (is_regex_special(s[i]))
239 return 0;
242 return 1;
245 #ifdef USE_LIBPCRE2
246 #define GREP_PCRE2_DEBUG_MALLOC 0
248 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
250 void *pointer = malloc(size);
251 #if GREP_PCRE2_DEBUG_MALLOC
252 static int count = 1;
253 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
254 #endif
255 return pointer;
258 static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
260 #if GREP_PCRE2_DEBUG_MALLOC
261 static int count = 1;
262 if (pointer)
263 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
264 #endif
265 free(pointer);
268 static int pcre2_jit_functional(void)
270 static int jit_working = -1;
271 pcre2_code *code;
272 size_t off;
273 int err;
275 if (jit_working != -1)
276 return jit_working;
279 * Try to JIT compile a simple pattern to probe if the JIT is
280 * working in general. It might fail for systems where creating
281 * memory mappings for runtime code generation is restricted.
283 code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL);
284 if (!code)
285 return 0;
287 jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0;
288 pcre2_code_free(code);
290 return jit_working;
293 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
295 int error;
296 PCRE2_UCHAR errbuf[256];
297 PCRE2_SIZE erroffset;
298 int options = PCRE2_MULTILINE;
299 int jitret;
300 int patinforet;
301 size_t jitsizearg;
302 int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
305 * Call pcre2_general_context_create() before calling any
306 * other pcre2_*(). It sets up our malloc()/free() functions
307 * with which everything else is allocated.
309 p->pcre2_general_context = pcre2_general_context_create(
310 pcre2_malloc, pcre2_free, NULL);
311 if (!p->pcre2_general_context)
312 die("Couldn't allocate PCRE2 general context");
314 if (opt->ignore_case) {
315 if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
316 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
317 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
318 pcre2_set_character_tables(p->pcre2_compile_context,
319 p->pcre2_tables);
321 options |= PCRE2_CASELESS;
323 if (!opt->ignore_locale && is_utf8_locale() && !literal)
324 options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
326 #ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
327 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
328 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
329 options |= PCRE2_NO_START_OPTIMIZE;
330 #endif
332 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
333 p->patternlen, options, &error, &erroffset,
334 p->pcre2_compile_context);
336 if (p->pcre2_pattern) {
337 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
338 if (!p->pcre2_match_data)
339 die("Couldn't allocate PCRE2 match data");
340 } else {
341 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
342 compile_regexp_failed(p, (const char *)&errbuf);
345 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
346 if (p->pcre2_jit_on) {
347 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
348 if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) {
350 * Even though pcre2_config(PCRE2_CONFIG_JIT, ...)
351 * indicated JIT support, the library might still
352 * fail to generate JIT code for various reasons,
353 * e.g. when SELinux's 'deny_execmem' or PaX's
354 * MPROTECT prevent creating W|X memory mappings.
356 * Instead of faling hard, fall back to interpreter
357 * mode, just as if the pattern was prefixed with
358 * '(*NO_JIT)'.
360 p->pcre2_jit_on = 0;
361 return;
362 } else if (jitret) {
363 int need_clip = p->patternlen > 64;
364 int clip_len = need_clip ? 64 : p->patternlen;
365 die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s",
366 clip_len, p->pattern, need_clip ? "..." : "", jitret,
367 pcre2_jit_functional()
368 ? "\nPerhaps prefix (*NO_JIT) to your pattern?"
369 : "");
373 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
374 * tells us whether the library itself supports JIT,
375 * but to see whether we're going to be actually using
376 * JIT we need to extract PCRE2_INFO_JITSIZE from the
377 * pattern *after* we do pcre2_jit_compile() above.
379 * This is because if the pattern contains the
380 * (*NO_JIT) verb (see pcre2syntax(3))
381 * pcre2_jit_compile() will exit early with 0. If we
382 * then proceed to call pcre2_jit_match() further down
383 * the line instead of pcre2_match() we'll either
384 * segfault (pre PCRE 10.31) or run into a fatal error
385 * (post PCRE2 10.31)
387 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
388 if (patinforet)
389 BUG("pcre2_pattern_info() failed: %d", patinforet);
390 if (jitsizearg == 0) {
391 p->pcre2_jit_on = 0;
392 return;
397 static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
398 regmatch_t *match, int eflags)
400 int ret, flags = 0;
401 PCRE2_SIZE *ovector;
402 PCRE2_UCHAR errbuf[256];
404 if (eflags & REG_NOTBOL)
405 flags |= PCRE2_NOTBOL;
407 if (p->pcre2_jit_on)
408 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
409 eol - line, 0, flags, p->pcre2_match_data,
410 NULL);
411 else
412 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
413 eol - line, 0, flags, p->pcre2_match_data,
414 NULL);
416 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
417 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
418 die("%s failed with error code %d: %s",
419 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
420 errbuf);
422 if (ret > 0) {
423 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
424 ret = 0;
425 match->rm_so = (int)ovector[0];
426 match->rm_eo = (int)ovector[1];
429 return ret;
432 static void free_pcre2_pattern(struct grep_pat *p)
434 pcre2_compile_context_free(p->pcre2_compile_context);
435 pcre2_code_free(p->pcre2_pattern);
436 pcre2_match_data_free(p->pcre2_match_data);
437 #ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
438 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
439 #else
440 free((void *)p->pcre2_tables);
441 #endif
442 pcre2_general_context_free(p->pcre2_general_context);
444 #else /* !USE_LIBPCRE2 */
445 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
447 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
450 static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
451 regmatch_t *match, int eflags)
453 return 1;
456 static void free_pcre2_pattern(struct grep_pat *p)
460 static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
462 struct strbuf sb = STRBUF_INIT;
463 int err;
464 int regflags = 0;
466 basic_regex_quote_buf(&sb, p->pattern);
467 if (opt->ignore_case)
468 regflags |= REG_ICASE;
469 err = regcomp(&p->regexp, sb.buf, regflags);
470 strbuf_release(&sb);
471 if (err) {
472 char errbuf[1024];
473 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
474 compile_regexp_failed(p, errbuf);
477 #endif /* !USE_LIBPCRE2 */
479 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
481 int err;
482 int regflags = REG_NEWLINE;
484 if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED)
485 opt->pattern_type_option = (opt->extended_regexp_option
486 ? GREP_PATTERN_TYPE_ERE
487 : GREP_PATTERN_TYPE_BRE);
489 p->word_regexp = opt->word_regexp;
490 p->ignore_case = opt->ignore_case;
491 p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED;
493 if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE &&
494 memchr(p->pattern, 0, p->patternlen))
495 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
497 p->is_fixed = is_fixed(p->pattern, p->patternlen);
498 #ifdef USE_LIBPCRE2
499 if (!p->fixed && !p->is_fixed) {
500 const char *no_jit = "(*NO_JIT)";
501 const int no_jit_len = strlen(no_jit);
502 if (starts_with(p->pattern, no_jit) &&
503 is_fixed(p->pattern + no_jit_len,
504 p->patternlen - no_jit_len))
505 p->is_fixed = 1;
507 #endif
508 if (p->fixed || p->is_fixed) {
509 #ifdef USE_LIBPCRE2
510 if (p->is_fixed) {
511 compile_pcre2_pattern(p, opt);
512 } else {
514 * E.g. t7811-grep-open.sh relies on the
515 * pattern being restored.
517 char *old_pattern = p->pattern;
518 size_t old_patternlen = p->patternlen;
519 struct strbuf sb = STRBUF_INIT;
522 * There is the PCRE2_LITERAL flag, but it's
523 * only in PCRE v2 10.30 and later. Needing to
524 * ifdef our way around that and dealing with
525 * it + PCRE2_MULTILINE being an error is more
526 * complex than just quoting this ourselves.
528 strbuf_add(&sb, "\\Q", 2);
529 strbuf_add(&sb, p->pattern, p->patternlen);
530 strbuf_add(&sb, "\\E", 2);
532 p->pattern = sb.buf;
533 p->patternlen = sb.len;
534 compile_pcre2_pattern(p, opt);
535 p->pattern = old_pattern;
536 p->patternlen = old_patternlen;
537 strbuf_release(&sb);
539 #else /* !USE_LIBPCRE2 */
540 compile_fixed_regexp(p, opt);
541 #endif /* !USE_LIBPCRE2 */
542 return;
545 if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) {
546 compile_pcre2_pattern(p, opt);
547 return;
550 if (p->ignore_case)
551 regflags |= REG_ICASE;
552 if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE)
553 regflags |= REG_EXTENDED;
554 err = regcomp(&p->regexp, p->pattern, regflags);
555 if (err) {
556 char errbuf[1024];
557 regerror(err, &p->regexp, errbuf, 1024);
558 compile_regexp_failed(p, errbuf);
562 static struct grep_expr *grep_not_expr(struct grep_expr *expr)
564 struct grep_expr *z = xcalloc(1, sizeof(*z));
565 z->node = GREP_NODE_NOT;
566 z->u.unary = expr;
567 return z;
570 static struct grep_expr *grep_binexp(enum grep_expr_node kind,
571 struct grep_expr *left,
572 struct grep_expr *right)
574 struct grep_expr *z = xcalloc(1, sizeof(*z));
575 z->node = kind;
576 z->u.binary.left = left;
577 z->u.binary.right = right;
578 return z;
581 static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
583 return grep_binexp(GREP_NODE_OR, left, right);
586 static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right)
588 return grep_binexp(GREP_NODE_AND, left, right);
591 static struct grep_expr *compile_pattern_or(struct grep_pat **);
592 static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
594 struct grep_pat *p;
595 struct grep_expr *x;
597 p = *list;
598 if (!p)
599 return NULL;
600 switch (p->token) {
601 case GREP_PATTERN: /* atom */
602 case GREP_PATTERN_HEAD:
603 case GREP_PATTERN_BODY:
604 CALLOC_ARRAY(x, 1);
605 x->node = GREP_NODE_ATOM;
606 x->u.atom = p;
607 *list = p->next;
608 return x;
609 case GREP_OPEN_PAREN:
610 *list = p->next;
611 x = compile_pattern_or(list);
612 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
613 die("unmatched parenthesis");
614 *list = (*list)->next;
615 return x;
616 default:
617 return NULL;
621 static struct grep_expr *compile_pattern_not(struct grep_pat **list)
623 struct grep_pat *p;
624 struct grep_expr *x;
626 p = *list;
627 if (!p)
628 return NULL;
629 switch (p->token) {
630 case GREP_NOT:
631 if (!p->next)
632 die("--not not followed by pattern expression");
633 *list = p->next;
634 x = compile_pattern_not(list);
635 if (!x)
636 die("--not followed by non pattern expression");
637 return grep_not_expr(x);
638 default:
639 return compile_pattern_atom(list);
643 static struct grep_expr *compile_pattern_and(struct grep_pat **list)
645 struct grep_pat *p;
646 struct grep_expr *x, *y;
648 x = compile_pattern_not(list);
649 p = *list;
650 if (p && p->token == GREP_AND) {
651 if (!x)
652 die("--and not preceded by pattern expression");
653 if (!p->next)
654 die("--and not followed by pattern expression");
655 *list = p->next;
656 y = compile_pattern_and(list);
657 if (!y)
658 die("--and not followed by pattern expression");
659 return grep_and_expr(x, y);
661 return x;
664 static struct grep_expr *compile_pattern_or(struct grep_pat **list)
666 struct grep_pat *p;
667 struct grep_expr *x, *y;
669 x = compile_pattern_and(list);
670 p = *list;
671 if (x && p && p->token != GREP_CLOSE_PAREN) {
672 y = compile_pattern_or(list);
673 if (!y)
674 die("not a pattern expression %s", p->pattern);
675 return grep_or_expr(x, y);
677 return x;
680 static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
682 return compile_pattern_or(list);
685 static struct grep_expr *grep_true_expr(void)
687 struct grep_expr *z = xcalloc(1, sizeof(*z));
688 z->node = GREP_NODE_TRUE;
689 return z;
692 static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
694 struct grep_pat *p;
695 struct grep_expr *header_expr;
696 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
697 enum grep_header_field fld;
699 if (!opt->header_list)
700 return NULL;
702 for (p = opt->header_list; p; p = p->next) {
703 if (p->token != GREP_PATTERN_HEAD)
704 BUG("a non-header pattern in grep header list.");
705 if (p->field < GREP_HEADER_FIELD_MIN ||
706 GREP_HEADER_FIELD_MAX <= p->field)
707 BUG("unknown header field %d", p->field);
708 compile_regexp(p, opt);
711 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
712 header_group[fld] = NULL;
714 for (p = opt->header_list; p; p = p->next) {
715 struct grep_expr *h;
716 struct grep_pat *pp = p;
718 h = compile_pattern_atom(&pp);
719 if (!h || pp != p->next)
720 BUG("malformed header expr");
721 if (!header_group[p->field]) {
722 header_group[p->field] = h;
723 continue;
725 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
728 header_expr = NULL;
730 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
731 if (!header_group[fld])
732 continue;
733 if (!header_expr)
734 header_expr = grep_true_expr();
735 header_expr = grep_or_expr(header_group[fld], header_expr);
737 return header_expr;
740 static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
742 struct grep_expr *z = x;
744 while (x) {
745 assert(x->node == GREP_NODE_OR);
746 if (x->u.binary.right &&
747 x->u.binary.right->node == GREP_NODE_TRUE) {
748 x->u.binary.right = y;
749 break;
751 x = x->u.binary.right;
753 return z;
756 void compile_grep_patterns(struct grep_opt *opt)
758 struct grep_pat *p;
759 struct grep_expr *header_expr = prep_header_patterns(opt);
760 int extended = 0;
762 for (p = opt->pattern_list; p; p = p->next) {
763 switch (p->token) {
764 case GREP_PATTERN: /* atom */
765 case GREP_PATTERN_HEAD:
766 case GREP_PATTERN_BODY:
767 compile_regexp(p, opt);
768 break;
769 default:
770 extended = 1;
771 break;
775 if (opt->all_match || opt->no_body_match || header_expr)
776 extended = 1;
777 else if (!extended)
778 return;
780 p = opt->pattern_list;
781 if (p)
782 opt->pattern_expression = compile_pattern_expr(&p);
783 if (p)
784 die("incomplete pattern expression: %s", p->pattern);
786 if (opt->no_body_match && opt->pattern_expression)
787 opt->pattern_expression = grep_not_expr(opt->pattern_expression);
789 if (!header_expr)
790 return;
792 if (!opt->pattern_expression)
793 opt->pattern_expression = header_expr;
794 else if (opt->all_match)
795 opt->pattern_expression = grep_splice_or(header_expr,
796 opt->pattern_expression);
797 else
798 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
799 header_expr);
800 opt->all_match = 1;
803 static void free_pattern_expr(struct grep_expr *x)
805 switch (x->node) {
806 case GREP_NODE_TRUE:
807 case GREP_NODE_ATOM:
808 break;
809 case GREP_NODE_NOT:
810 free_pattern_expr(x->u.unary);
811 break;
812 case GREP_NODE_AND:
813 case GREP_NODE_OR:
814 free_pattern_expr(x->u.binary.left);
815 free_pattern_expr(x->u.binary.right);
816 break;
818 free(x);
821 static void free_grep_pat(struct grep_pat *pattern)
823 struct grep_pat *p, *n;
825 for (p = pattern; p; p = n) {
826 n = p->next;
827 switch (p->token) {
828 case GREP_PATTERN: /* atom */
829 case GREP_PATTERN_HEAD:
830 case GREP_PATTERN_BODY:
831 if (p->pcre2_pattern)
832 free_pcre2_pattern(p);
833 else
834 regfree(&p->regexp);
835 free(p->pattern);
836 break;
837 default:
838 break;
840 free(p);
844 void free_grep_patterns(struct grep_opt *opt)
846 free_grep_pat(opt->pattern_list);
847 free_grep_pat(opt->header_list);
849 if (opt->pattern_expression)
850 free_pattern_expr(opt->pattern_expression);
853 static const char *end_of_line(const char *cp, unsigned long *left)
855 unsigned long l = *left;
856 while (l && *cp != '\n') {
857 l--;
858 cp++;
860 *left = l;
861 return cp;
864 static int word_char(char ch)
866 return isalnum(ch) || ch == '_';
869 static void output_color(struct grep_opt *opt, const void *data, size_t size,
870 const char *color)
872 if (want_color(opt->color) && color && color[0]) {
873 opt->output(opt, color, strlen(color));
874 opt->output(opt, data, size);
875 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
876 } else
877 opt->output(opt, data, size);
880 static void output_sep(struct grep_opt *opt, char sign)
882 if (opt->null_following_name)
883 opt->output(opt, "\0", 1);
884 else
885 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
888 static void show_name(struct grep_opt *opt, const char *name)
890 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
891 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
894 static int patmatch(struct grep_pat *p,
895 const char *line, const char *eol,
896 regmatch_t *match, int eflags)
898 int hit;
900 if (p->pcre2_pattern)
901 hit = !pcre2match(p, line, eol, match, eflags);
902 else
903 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
904 eflags);
906 return hit;
909 static void strip_timestamp(const char *bol, const char **eol_p)
911 const char *eol = *eol_p;
913 while (bol < --eol) {
914 if (*eol != '>')
915 continue;
916 *eol_p = ++eol;
917 break;
921 static struct {
922 const char *field;
923 size_t len;
924 } header_field[] = {
925 { "author ", 7 },
926 { "committer ", 10 },
927 { "reflog ", 7 },
930 static int headerless_match_one_pattern(struct grep_pat *p,
931 const char *bol, const char *eol,
932 enum grep_context ctx,
933 regmatch_t *pmatch, int eflags)
935 int hit = 0;
936 const char *start = bol;
938 if ((p->token != GREP_PATTERN) &&
939 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
940 return 0;
942 again:
943 hit = patmatch(p, bol, eol, pmatch, eflags);
945 if (hit && p->word_regexp) {
946 if ((pmatch[0].rm_so < 0) ||
947 (eol - bol) < pmatch[0].rm_so ||
948 (pmatch[0].rm_eo < 0) ||
949 (eol - bol) < pmatch[0].rm_eo)
950 die("regexp returned nonsense");
952 /* Match beginning must be either beginning of the
953 * line, or at word boundary (i.e. the last char must
954 * not be a word char). Similarly, match end must be
955 * either end of the line, or at word boundary
956 * (i.e. the next char must not be a word char).
958 if ( ((pmatch[0].rm_so == 0) ||
959 !word_char(bol[pmatch[0].rm_so-1])) &&
960 ((pmatch[0].rm_eo == (eol-bol)) ||
961 !word_char(bol[pmatch[0].rm_eo])) )
963 else
964 hit = 0;
966 /* Words consist of at least one character. */
967 if (pmatch->rm_so == pmatch->rm_eo)
968 hit = 0;
970 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
971 /* There could be more than one match on the
972 * line, and the first match might not be
973 * strict word match. But later ones could be!
974 * Forward to the next possible start, i.e. the
975 * next position following a non-word char.
977 bol = pmatch[0].rm_so + bol + 1;
978 while (word_char(bol[-1]) && bol < eol)
979 bol++;
980 eflags |= REG_NOTBOL;
981 if (bol < eol)
982 goto again;
985 if (hit) {
986 pmatch[0].rm_so += bol - start;
987 pmatch[0].rm_eo += bol - start;
989 return hit;
992 static int match_one_pattern(struct grep_pat *p,
993 const char *bol, const char *eol,
994 enum grep_context ctx, regmatch_t *pmatch,
995 int eflags)
997 const char *field;
998 size_t len;
1000 if (p->token == GREP_PATTERN_HEAD) {
1001 assert(p->field < ARRAY_SIZE(header_field));
1002 field = header_field[p->field].field;
1003 len = header_field[p->field].len;
1004 if (strncmp(bol, field, len))
1005 return 0;
1006 bol += len;
1008 switch (p->field) {
1009 case GREP_HEADER_AUTHOR:
1010 case GREP_HEADER_COMMITTER:
1011 strip_timestamp(bol, &eol);
1012 break;
1013 default:
1014 break;
1018 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
1022 static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
1023 const char *bol, const char *eol,
1024 enum grep_context ctx, ssize_t *col,
1025 ssize_t *icol, int collect_hits)
1027 int h = 0;
1029 switch (x->node) {
1030 case GREP_NODE_TRUE:
1031 h = 1;
1032 break;
1033 case GREP_NODE_ATOM:
1035 regmatch_t tmp;
1036 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1037 &tmp, 0);
1038 if (h && (*col < 0 || tmp.rm_so < *col))
1039 *col = tmp.rm_so;
1041 if (x->u.atom->token == GREP_PATTERN_BODY)
1042 opt->body_hit |= h;
1043 break;
1044 case GREP_NODE_NOT:
1046 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1048 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1050 break;
1051 case GREP_NODE_AND:
1052 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1053 icol, 0);
1054 if (h || opt->columnnum) {
1056 * Don't short-circuit AND when given --column, since a
1057 * NOT earlier in the tree may turn this into an OR. In
1058 * this case, see the below comment.
1060 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1061 ctx, col, icol, 0);
1063 break;
1064 case GREP_NODE_OR:
1065 if (!(collect_hits || opt->columnnum)) {
1067 * Don't short-circuit OR when given --column (or
1068 * collecting hits) to ensure we don't skip a later
1069 * child that would produce an earlier match.
1071 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1072 ctx, col, icol, 0) ||
1073 match_expr_eval(opt, x->u.binary.right, bol,
1074 eol, ctx, col, icol, 0));
1076 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1077 icol, 0);
1078 if (collect_hits)
1079 x->u.binary.left->hit |= h;
1080 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1081 icol, collect_hits);
1082 break;
1083 default:
1084 die("Unexpected node type (internal error) %d", x->node);
1086 if (collect_hits)
1087 x->hit |= h;
1088 return h;
1091 static int match_expr(struct grep_opt *opt,
1092 const char *bol, const char *eol,
1093 enum grep_context ctx, ssize_t *col,
1094 ssize_t *icol, int collect_hits)
1096 struct grep_expr *x = opt->pattern_expression;
1097 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1100 static int match_line(struct grep_opt *opt,
1101 const char *bol, const char *eol,
1102 ssize_t *col, ssize_t *icol,
1103 enum grep_context ctx, int collect_hits)
1105 struct grep_pat *p;
1106 int hit = 0;
1108 if (opt->pattern_expression)
1109 return match_expr(opt, bol, eol, ctx, col, icol,
1110 collect_hits);
1112 /* we do not call with collect_hits without being extended */
1113 for (p = opt->pattern_list; p; p = p->next) {
1114 regmatch_t tmp;
1115 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1116 hit |= 1;
1117 if (!opt->columnnum) {
1119 * Without --column, any single match on a line
1120 * is enough to know that it needs to be
1121 * printed. With --column, scan _all_ patterns
1122 * to find the earliest.
1124 break;
1126 if (*col < 0 || tmp.rm_so < *col)
1127 *col = tmp.rm_so;
1130 return hit;
1133 static int match_next_pattern(struct grep_pat *p,
1134 const char *bol, const char *eol,
1135 enum grep_context ctx,
1136 regmatch_t *pmatch, int eflags)
1138 regmatch_t match;
1140 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
1141 return 0;
1142 if (match.rm_so < 0 || match.rm_eo < 0)
1143 return 0;
1144 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1145 if (match.rm_so > pmatch->rm_so)
1146 return 1;
1147 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1148 return 1;
1150 pmatch->rm_so = match.rm_so;
1151 pmatch->rm_eo = match.rm_eo;
1152 return 1;
1155 int grep_next_match(struct grep_opt *opt,
1156 const char *bol, const char *eol,
1157 enum grep_context ctx, regmatch_t *pmatch,
1158 enum grep_header_field field, int eflags)
1160 struct grep_pat *p;
1161 int hit = 0;
1163 pmatch->rm_so = pmatch->rm_eo = -1;
1164 if (bol < eol) {
1165 for (p = ((ctx == GREP_CONTEXT_HEAD)
1166 ? opt->header_list : opt->pattern_list);
1167 p; p = p->next) {
1168 switch (p->token) {
1169 case GREP_PATTERN_HEAD:
1170 if ((field != GREP_HEADER_FIELD_MAX) &&
1171 (p->field != field))
1172 continue;
1173 /* fall thru */
1174 case GREP_PATTERN: /* atom */
1175 case GREP_PATTERN_BODY:
1176 hit |= match_next_pattern(p, bol, eol, ctx,
1177 pmatch, eflags);
1178 break;
1179 default:
1180 break;
1184 return hit;
1187 static void show_line_header(struct grep_opt *opt, const char *name,
1188 unsigned lno, ssize_t cno, char sign)
1190 if (opt->heading && opt->last_shown == 0) {
1191 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1192 opt->output(opt, "\n", 1);
1194 opt->last_shown = lno;
1196 if (!opt->heading && opt->pathname) {
1197 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1198 output_sep(opt, sign);
1200 if (opt->linenum) {
1201 char buf[32];
1202 xsnprintf(buf, sizeof(buf), "%d", lno);
1203 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
1204 output_sep(opt, sign);
1207 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1208 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1209 * being called with a context line.
1211 if (opt->columnnum && cno) {
1212 char buf[32];
1213 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
1214 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
1215 output_sep(opt, sign);
1219 static void show_line(struct grep_opt *opt,
1220 const char *bol, const char *eol,
1221 const char *name, unsigned lno, ssize_t cno, char sign)
1223 int rest = eol - bol;
1224 const char *match_color = NULL;
1225 const char *line_color = NULL;
1227 if (opt->file_break && opt->last_shown == 0) {
1228 if (opt->show_hunk_mark)
1229 opt->output(opt, "\n", 1);
1230 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1231 if (opt->last_shown == 0) {
1232 if (opt->show_hunk_mark) {
1233 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1234 opt->output(opt, "\n", 1);
1236 } else if (lno > opt->last_shown + 1) {
1237 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
1238 opt->output(opt, "\n", 1);
1241 if (!opt->only_matching) {
1243 * In case the line we're being called with contains more than
1244 * one match, leave printing each header to the loop below.
1246 show_line_header(opt, name, lno, cno, sign);
1248 if (opt->color || opt->only_matching) {
1249 regmatch_t match;
1250 enum grep_context ctx = GREP_CONTEXT_BODY;
1251 int eflags = 0;
1253 if (opt->color) {
1254 if (sign == ':')
1255 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
1256 else
1257 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
1258 if (sign == ':')
1259 line_color = opt->colors[GREP_COLOR_SELECTED];
1260 else if (sign == '-')
1261 line_color = opt->colors[GREP_COLOR_CONTEXT];
1262 else if (sign == '=')
1263 line_color = opt->colors[GREP_COLOR_FUNCTION];
1265 while (grep_next_match(opt, bol, eol, ctx, &match,
1266 GREP_HEADER_FIELD_MAX, eflags)) {
1267 if (match.rm_so == match.rm_eo)
1268 break;
1270 if (opt->only_matching)
1271 show_line_header(opt, name, lno, cno, sign);
1272 else
1273 output_color(opt, bol, match.rm_so, line_color);
1274 output_color(opt, bol + match.rm_so,
1275 match.rm_eo - match.rm_so, match_color);
1276 if (opt->only_matching)
1277 opt->output(opt, "\n", 1);
1278 bol += match.rm_eo;
1279 cno += match.rm_eo;
1280 rest -= match.rm_eo;
1281 eflags = REG_NOTBOL;
1284 if (!opt->only_matching) {
1285 output_color(opt, bol, rest, line_color);
1286 opt->output(opt, "\n", 1);
1290 int grep_use_locks;
1293 * This lock protects access to the gitattributes machinery, which is
1294 * not thread-safe.
1296 pthread_mutex_t grep_attr_mutex;
1298 static inline void grep_attr_lock(void)
1300 if (grep_use_locks)
1301 pthread_mutex_lock(&grep_attr_mutex);
1304 static inline void grep_attr_unlock(void)
1306 if (grep_use_locks)
1307 pthread_mutex_unlock(&grep_attr_mutex);
1310 static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
1311 const char *bol, const char *eol)
1313 xdemitconf_t *xecfg = opt->priv;
1314 if (xecfg && !xecfg->find_func) {
1315 grep_source_load_driver(gs, opt->repo->index);
1316 if (gs->driver->funcname.pattern) {
1317 const struct userdiff_funcname *pe = &gs->driver->funcname;
1318 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1319 } else {
1320 xecfg = opt->priv = NULL;
1324 if (xecfg) {
1325 char buf[1];
1326 return xecfg->find_func(bol, eol - bol, buf, 1,
1327 xecfg->find_func_priv) >= 0;
1330 if (bol == eol)
1331 return 0;
1332 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1333 return 1;
1334 return 0;
1337 static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1338 const char *bol, unsigned lno)
1340 while (bol > gs->buf) {
1341 const char *eol = --bol;
1343 while (bol > gs->buf && bol[-1] != '\n')
1344 bol--;
1345 lno--;
1347 if (lno <= opt->last_shown)
1348 break;
1350 if (match_funcname(opt, gs, bol, eol)) {
1351 show_line(opt, bol, eol, gs->name, lno, 0, '=');
1352 break;
1357 static int is_empty_line(const char *bol, const char *eol);
1359 static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1360 const char *bol, const char *end, unsigned lno)
1362 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1363 int funcname_needed = !!opt->funcname, comment_needed = 0;
1365 if (opt->pre_context < lno)
1366 from = lno - opt->pre_context;
1367 if (from <= opt->last_shown)
1368 from = opt->last_shown + 1;
1369 orig_from = from;
1370 if (opt->funcbody) {
1371 if (match_funcname(opt, gs, bol, end))
1372 comment_needed = 1;
1373 else
1374 funcname_needed = 1;
1375 from = opt->last_shown + 1;
1378 /* Rewind. */
1379 while (bol > gs->buf && cur > from) {
1380 const char *next_bol = bol;
1381 const char *eol = --bol;
1383 while (bol > gs->buf && bol[-1] != '\n')
1384 bol--;
1385 cur--;
1386 if (comment_needed && (is_empty_line(bol, eol) ||
1387 match_funcname(opt, gs, bol, eol))) {
1388 comment_needed = 0;
1389 from = orig_from;
1390 if (cur < from) {
1391 cur++;
1392 bol = next_bol;
1393 break;
1396 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1397 funcname_lno = cur;
1398 funcname_needed = 0;
1399 if (opt->funcbody)
1400 comment_needed = 1;
1401 else
1402 from = orig_from;
1406 /* We need to look even further back to find a function signature. */
1407 if (opt->funcname && funcname_needed)
1408 show_funcname_line(opt, gs, bol, cur);
1410 /* Back forward. */
1411 while (cur < lno) {
1412 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1414 while (*eol != '\n')
1415 eol++;
1416 show_line(opt, bol, eol, gs->name, cur, 0, sign);
1417 bol = eol + 1;
1418 cur++;
1422 static int should_lookahead(struct grep_opt *opt)
1424 struct grep_pat *p;
1426 if (opt->pattern_expression)
1427 return 0; /* punt for too complex stuff */
1428 if (opt->invert)
1429 return 0;
1430 for (p = opt->pattern_list; p; p = p->next) {
1431 if (p->token != GREP_PATTERN)
1432 return 0; /* punt for "header only" and stuff */
1434 return 1;
1437 static int look_ahead(struct grep_opt *opt,
1438 unsigned long *left_p,
1439 unsigned *lno_p,
1440 const char **bol_p)
1442 unsigned lno = *lno_p;
1443 const char *bol = *bol_p;
1444 struct grep_pat *p;
1445 const char *sp, *last_bol;
1446 regoff_t earliest = -1;
1448 for (p = opt->pattern_list; p; p = p->next) {
1449 int hit;
1450 regmatch_t m;
1452 hit = patmatch(p, bol, bol + *left_p, &m, 0);
1453 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1454 continue;
1455 if (earliest < 0 || m.rm_so < earliest)
1456 earliest = m.rm_so;
1459 if (earliest < 0) {
1460 *bol_p = bol + *left_p;
1461 *left_p = 0;
1462 return 1;
1464 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1465 ; /* find the beginning of the line */
1466 last_bol = sp;
1468 for (sp = bol; sp < last_bol; sp++) {
1469 if (*sp == '\n')
1470 lno++;
1472 *left_p -= last_bol - bol;
1473 *bol_p = last_bol;
1474 *lno_p = lno;
1475 return 0;
1478 static int fill_textconv_grep(struct repository *r,
1479 struct userdiff_driver *driver,
1480 struct grep_source *gs)
1482 struct diff_filespec *df;
1483 char *buf;
1484 size_t size;
1486 if (!driver || !driver->textconv)
1487 return grep_source_load(gs);
1490 * The textconv interface is intimately tied to diff_filespecs, so we
1491 * have to pretend to be one. If we could unify the grep_source
1492 * and diff_filespec structs, this mess could just go away.
1494 df = alloc_filespec(gs->path);
1495 switch (gs->type) {
1496 case GREP_SOURCE_OID:
1497 fill_filespec(df, gs->identifier, 1, 0100644);
1498 break;
1499 case GREP_SOURCE_FILE:
1500 fill_filespec(df, null_oid(), 0, 0100644);
1501 break;
1502 default:
1503 BUG("attempt to textconv something without a path?");
1507 * fill_textconv is not remotely thread-safe; it modifies the global
1508 * diff tempfile structure, writes to the_repo's odb and might
1509 * internally call thread-unsafe functions such as the
1510 * prepare_packed_git() lazy-initializator. Because of the last two, we
1511 * must ensure mutual exclusion between this call and the object reading
1512 * API, thus we use obj_read_lock() here.
1514 * TODO: allowing text conversion to run in parallel with object
1515 * reading operations might increase performance in the multithreaded
1516 * non-worktreee git-grep with --textconv.
1518 obj_read_lock();
1519 size = fill_textconv(r, driver, df, &buf);
1520 obj_read_unlock();
1521 free_filespec(df);
1524 * The normal fill_textconv usage by the diff machinery would just keep
1525 * the textconv'd buf separate from the diff_filespec. But much of the
1526 * grep code passes around a grep_source and assumes that its "buf"
1527 * pointer is the beginning of the thing we are searching. So let's
1528 * install our textconv'd version into the grep_source, taking care not
1529 * to leak any existing buffer.
1531 grep_source_clear_data(gs);
1532 gs->buf = buf;
1533 gs->size = size;
1535 return 0;
1538 static int is_empty_line(const char *bol, const char *eol)
1540 while (bol < eol && isspace(*bol))
1541 bol++;
1542 return bol == eol;
1545 static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1547 const char *bol;
1548 const char *peek_bol = NULL;
1549 unsigned long left;
1550 unsigned lno = 1;
1551 unsigned last_hit = 0;
1552 int binary_match_only = 0;
1553 unsigned count = 0;
1554 int try_lookahead = 0;
1555 int show_function = 0;
1556 struct userdiff_driver *textconv = NULL;
1557 enum grep_context ctx = GREP_CONTEXT_HEAD;
1558 xdemitconf_t xecfg;
1560 if (!opt->status_only && gs->name == NULL)
1561 BUG("grep call which could print a name requires "
1562 "grep_source.name be non-NULL");
1564 if (!opt->output)
1565 opt->output = std_output;
1567 if (opt->pre_context || opt->post_context || opt->file_break ||
1568 opt->funcbody) {
1569 /* Show hunk marks, except for the first file. */
1570 if (opt->last_shown)
1571 opt->show_hunk_mark = 1;
1573 * If we're using threads then we can't easily identify
1574 * the first file. Always put hunk marks in that case
1575 * and skip the very first one later in work_done().
1577 if (opt->output != std_output)
1578 opt->show_hunk_mark = 1;
1580 opt->last_shown = 0;
1582 if (opt->allow_textconv) {
1583 grep_source_load_driver(gs, opt->repo->index);
1585 * We might set up the shared textconv cache data here, which
1586 * is not thread-safe. Also, get_oid_with_context() and
1587 * parse_object() might be internally called. As they are not
1588 * currently thread-safe and might be racy with object reading,
1589 * obj_read_lock() must be called.
1591 grep_attr_lock();
1592 obj_read_lock();
1593 textconv = userdiff_get_textconv(opt->repo, gs->driver);
1594 obj_read_unlock();
1595 grep_attr_unlock();
1599 * We know the result of a textconv is text, so we only have to care
1600 * about binary handling if we are not using it.
1602 if (!textconv) {
1603 switch (opt->binary) {
1604 case GREP_BINARY_DEFAULT:
1605 if (grep_source_is_binary(gs, opt->repo->index))
1606 binary_match_only = 1;
1607 break;
1608 case GREP_BINARY_NOMATCH:
1609 if (grep_source_is_binary(gs, opt->repo->index))
1610 return 0; /* Assume unmatch */
1611 break;
1612 case GREP_BINARY_TEXT:
1613 break;
1614 default:
1615 BUG("unknown binary handling mode");
1619 memset(&xecfg, 0, sizeof(xecfg));
1620 opt->priv = &xecfg;
1622 try_lookahead = should_lookahead(opt);
1624 if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
1625 return 0;
1627 bol = gs->buf;
1628 left = gs->size;
1629 while (left) {
1630 const char *eol;
1631 int hit;
1632 ssize_t cno;
1633 ssize_t col = -1, icol = -1;
1636 * look_ahead() skips quickly to the line that possibly
1637 * has the next hit; don't call it if we need to do
1638 * something more than just skipping the current line
1639 * in response to an unmatch for the current line. E.g.
1640 * inside a post-context window, we will show the current
1641 * line as a context around the previous hit when it
1642 * doesn't hit.
1644 if (try_lookahead
1645 && !(last_hit
1646 && (show_function ||
1647 lno <= last_hit + opt->post_context))
1648 && look_ahead(opt, &left, &lno, &bol))
1649 break;
1650 eol = end_of_line(bol, &left);
1652 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1653 ctx = GREP_CONTEXT_BODY;
1655 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1657 if (collect_hits)
1658 goto next_line;
1660 /* "grep -v -e foo -e bla" should list lines
1661 * that do not have either, so inversion should
1662 * be done outside.
1664 if (opt->invert)
1665 hit = !hit;
1666 if (opt->unmatch_name_only) {
1667 if (hit)
1668 return 0;
1669 goto next_line;
1671 if (hit && (opt->max_count < 0 || count < opt->max_count)) {
1672 count++;
1673 if (opt->status_only)
1674 return 1;
1675 if (opt->name_only) {
1676 show_name(opt, gs->name);
1677 return 1;
1679 if (opt->count)
1680 goto next_line;
1681 if (binary_match_only) {
1682 opt->output(opt, "Binary file ", 12);
1683 output_color(opt, gs->name, strlen(gs->name),
1684 opt->colors[GREP_COLOR_FILENAME]);
1685 opt->output(opt, " matches\n", 9);
1686 return 1;
1688 /* Hit at this line. If we haven't shown the
1689 * pre-context lines, we would need to show them.
1691 if (opt->pre_context || opt->funcbody)
1692 show_pre_context(opt, gs, bol, eol, lno);
1693 else if (opt->funcname)
1694 show_funcname_line(opt, gs, bol, lno);
1695 cno = opt->invert ? icol : col;
1696 if (cno < 0) {
1698 * A negative cno indicates that there was no
1699 * match on the line. We are thus inverted and
1700 * being asked to show all lines that _don't_
1701 * match a given expression. Therefore, set cno
1702 * to 0 to suggest the whole line matches.
1704 cno = 0;
1706 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
1707 last_hit = lno;
1708 if (opt->funcbody)
1709 show_function = 1;
1710 goto next_line;
1712 if (show_function && (!peek_bol || peek_bol < bol)) {
1713 unsigned long peek_left = left;
1714 const char *peek_eol = eol;
1717 * Trailing empty lines are not interesting.
1718 * Peek past them to see if they belong to the
1719 * body of the current function.
1721 peek_bol = bol;
1722 while (is_empty_line(peek_bol, peek_eol)) {
1723 peek_bol = peek_eol + 1;
1724 peek_eol = end_of_line(peek_bol, &peek_left);
1727 if (match_funcname(opt, gs, peek_bol, peek_eol))
1728 show_function = 0;
1730 if (show_function ||
1731 (last_hit && lno <= last_hit + opt->post_context)) {
1732 /* If the last hit is within the post context,
1733 * we need to show this line.
1735 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
1738 next_line:
1739 bol = eol + 1;
1740 if (!left)
1741 break;
1742 left--;
1743 lno++;
1746 if (collect_hits)
1747 return 0;
1749 if (opt->status_only)
1750 return opt->unmatch_name_only;
1751 if (opt->unmatch_name_only) {
1752 /* We did not see any hit, so we want to show this */
1753 show_name(opt, gs->name);
1754 return 1;
1757 xdiff_clear_find_func(&xecfg);
1758 opt->priv = NULL;
1760 /* NEEDSWORK:
1761 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1762 * which feels mostly useless but sometimes useful. Maybe
1763 * make it another option? For now suppress them.
1765 if (opt->count && count) {
1766 char buf[32];
1767 if (opt->pathname) {
1768 output_color(opt, gs->name, strlen(gs->name),
1769 opt->colors[GREP_COLOR_FILENAME]);
1770 output_sep(opt, ':');
1772 xsnprintf(buf, sizeof(buf), "%u\n", count);
1773 opt->output(opt, buf, strlen(buf));
1774 return 1;
1776 return !!last_hit;
1779 static void clr_hit_marker(struct grep_expr *x)
1781 /* All-hit markers are meaningful only at the very top level
1782 * OR node.
1784 while (1) {
1785 x->hit = 0;
1786 if (x->node != GREP_NODE_OR)
1787 return;
1788 x->u.binary.left->hit = 0;
1789 x = x->u.binary.right;
1793 static int chk_hit_marker(struct grep_expr *x)
1795 /* Top level nodes have hit markers. See if they all are hits */
1796 while (1) {
1797 if (x->node != GREP_NODE_OR)
1798 return x->hit;
1799 if (!x->u.binary.left->hit)
1800 return 0;
1801 x = x->u.binary.right;
1805 int grep_source(struct grep_opt *opt, struct grep_source *gs)
1808 * we do not have to do the two-pass grep when we do not check
1809 * buffer-wide "all-match".
1811 if (!opt->all_match && !opt->no_body_match)
1812 return grep_source_1(opt, gs, 0);
1814 /* Otherwise the toplevel "or" terms hit a bit differently.
1815 * We first clear hit markers from them.
1817 clr_hit_marker(opt->pattern_expression);
1818 opt->body_hit = 0;
1819 grep_source_1(opt, gs, 1);
1821 if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
1822 return 0;
1823 if (opt->no_body_match && opt->body_hit)
1824 return 0;
1826 return grep_source_1(opt, gs, 0);
1829 static void grep_source_init_buf(struct grep_source *gs,
1830 const char *buf,
1831 unsigned long size)
1833 gs->type = GREP_SOURCE_BUF;
1834 gs->name = NULL;
1835 gs->path = NULL;
1836 gs->buf = buf;
1837 gs->size = size;
1838 gs->driver = NULL;
1839 gs->identifier = NULL;
1842 int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
1844 struct grep_source gs;
1845 int r;
1847 grep_source_init_buf(&gs, buf, size);
1849 r = grep_source(opt, &gs);
1851 grep_source_clear(&gs);
1852 return r;
1855 void grep_source_init_file(struct grep_source *gs, const char *name,
1856 const char *path)
1858 gs->type = GREP_SOURCE_FILE;
1859 gs->name = xstrdup_or_null(name);
1860 gs->path = xstrdup_or_null(path);
1861 gs->buf = NULL;
1862 gs->size = 0;
1863 gs->driver = NULL;
1864 gs->identifier = xstrdup(path);
1867 void grep_source_init_oid(struct grep_source *gs, const char *name,
1868 const char *path, const struct object_id *oid,
1869 struct repository *repo)
1871 gs->type = GREP_SOURCE_OID;
1872 gs->name = xstrdup_or_null(name);
1873 gs->path = xstrdup_or_null(path);
1874 gs->buf = NULL;
1875 gs->size = 0;
1876 gs->driver = NULL;
1877 gs->identifier = oiddup(oid);
1878 gs->repo = repo;
1881 void grep_source_clear(struct grep_source *gs)
1883 FREE_AND_NULL(gs->name);
1884 FREE_AND_NULL(gs->path);
1885 FREE_AND_NULL(gs->identifier);
1886 grep_source_clear_data(gs);
1889 void grep_source_clear_data(struct grep_source *gs)
1891 switch (gs->type) {
1892 case GREP_SOURCE_FILE:
1893 case GREP_SOURCE_OID:
1894 /* these types own the buffer */
1895 free((char *)gs->buf);
1896 gs->buf = NULL;
1897 gs->size = 0;
1898 break;
1899 case GREP_SOURCE_BUF:
1900 /* leave user-provided buf intact */
1901 break;
1905 static int grep_source_load_oid(struct grep_source *gs)
1907 enum object_type type;
1909 gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1910 &gs->size);
1911 if (!gs->buf)
1912 return error(_("'%s': unable to read %s"),
1913 gs->name,
1914 oid_to_hex(gs->identifier));
1915 return 0;
1918 static int grep_source_load_file(struct grep_source *gs)
1920 const char *filename = gs->identifier;
1921 struct stat st;
1922 char *data;
1923 size_t size;
1924 int i;
1926 if (lstat(filename, &st) < 0) {
1927 err_ret:
1928 if (errno != ENOENT)
1929 error_errno(_("failed to stat '%s'"), filename);
1930 return -1;
1932 if (!S_ISREG(st.st_mode))
1933 return -1;
1934 size = xsize_t(st.st_size);
1935 i = open(filename, O_RDONLY);
1936 if (i < 0)
1937 goto err_ret;
1938 data = xmallocz(size);
1939 if (st.st_size != read_in_full(i, data, size)) {
1940 error_errno(_("'%s': short read"), filename);
1941 close(i);
1942 free(data);
1943 return -1;
1945 close(i);
1947 gs->buf = data;
1948 gs->size = size;
1949 return 0;
1952 static int grep_source_load(struct grep_source *gs)
1954 if (gs->buf)
1955 return 0;
1957 switch (gs->type) {
1958 case GREP_SOURCE_FILE:
1959 return grep_source_load_file(gs);
1960 case GREP_SOURCE_OID:
1961 return grep_source_load_oid(gs);
1962 case GREP_SOURCE_BUF:
1963 return gs->buf ? 0 : -1;
1965 BUG("invalid grep_source type to load");
1968 void grep_source_load_driver(struct grep_source *gs,
1969 struct index_state *istate)
1971 if (gs->driver)
1972 return;
1974 grep_attr_lock();
1975 if (gs->path)
1976 gs->driver = userdiff_find_by_path(istate, gs->path);
1977 if (!gs->driver)
1978 gs->driver = userdiff_find_by_name("default");
1979 grep_attr_unlock();
1982 static int grep_source_is_binary(struct grep_source *gs,
1983 struct index_state *istate)
1985 grep_source_load_driver(gs, istate);
1986 if (gs->driver->binary != -1)
1987 return gs->driver->binary;
1989 if (!grep_source_load(gs))
1990 return buffer_is_binary(gs->buf, gs->size);
1992 return 0;