Better string search algorithm for log -S
[git/log-S.git] / diffcore-pickaxe.c
blob78ac1c098238780d04715eb7093cda8140942446
1 // -*- c-basic-offset: 8; indent-tabs-mode: t -*-
2 /*
3 * Copyright (C) 2005 Junio C Hamano
4 * Copyright (C) 2010 Google Inc.
5 */
6 #include "cache.h"
7 #include "diff.h"
8 #include "diffcore.h"
9 #include "xdiff-interface.h"
11 struct diffgrep_cb {
12 regex_t *regexp;
13 int hit;
16 static void diffgrep_consume(void *priv, char *line, unsigned long len)
18 struct diffgrep_cb *data = priv;
19 regmatch_t regmatch;
20 int hold;
22 if (line[0] != '+' && line[0] != '-')
23 return;
24 if (data->hit)
26 * NEEDSWORK: we should have a way to terminate the
27 * caller early.
29 return;
30 /* Yuck -- line ought to be "const char *"! */
31 hold = line[len];
32 line[len] = '\0';
33 data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
34 line[len] = hold;
37 static void fill_one(struct diff_filespec *one,
38 mmfile_t *mf, struct userdiff_driver **textconv)
40 if (DIFF_FILE_VALID(one)) {
41 *textconv = get_textconv(one);
42 mf->size = fill_textconv(*textconv, one, &mf->ptr);
43 } else {
44 memset(mf, 0, sizeof(*mf));
48 static int diff_grep(struct diff_filepair *p, regex_t *regexp, struct diff_options *o)
50 regmatch_t regmatch;
51 struct userdiff_driver *textconv_one = NULL;
52 struct userdiff_driver *textconv_two = NULL;
53 mmfile_t mf1, mf2;
54 int hit;
56 if (diff_unmodified_pair(p))
57 return 0;
59 fill_one(p->one, &mf1, &textconv_one);
60 fill_one(p->two, &mf2, &textconv_two);
62 if (!mf1.ptr) {
63 if (!mf2.ptr)
64 return 0; /* ignore unmerged */
65 /* created "two" -- does it have what we are looking for? */
66 hit = !regexec(regexp, p->two->data, 1, &regmatch, 0);
67 } else if (!mf2.ptr) {
68 /* removed "one" -- did it have what we are looking for? */
69 hit = !regexec(regexp, p->one->data, 1, &regmatch, 0);
70 } else {
72 * We have both sides; need to run textual diff and see if
73 * the pattern appears on added/deleted lines.
75 struct diffgrep_cb ecbdata;
76 xpparam_t xpp;
77 xdemitconf_t xecfg;
79 memset(&xpp, 0, sizeof(xpp));
80 memset(&xecfg, 0, sizeof(xecfg));
81 ecbdata.regexp = regexp;
82 ecbdata.hit = 0;
83 xecfg.ctxlen = o->context;
84 xecfg.interhunkctxlen = o->interhunkcontext;
85 xdi_diff_outf(&mf1, &mf2, diffgrep_consume, &ecbdata,
86 &xpp, &xecfg);
87 hit = ecbdata.hit;
89 if (textconv_one)
90 free(mf1.ptr);
91 if (textconv_two)
92 free(mf2.ptr);
93 return hit;
96 static void diffcore_pickaxe_grep(struct diff_options *o)
98 struct diff_queue_struct *q = &diff_queued_diff;
99 int i, has_changes, err;
100 regex_t regex;
101 struct diff_queue_struct outq;
102 outq.queue = NULL;
103 outq.nr = outq.alloc = 0;
105 err = regcomp(&regex, o->pickaxe, REG_EXTENDED | REG_NEWLINE);
106 if (err) {
107 char errbuf[1024];
108 regerror(err, &regex, errbuf, 1024);
109 regfree(&regex);
110 die("invalid log-grep regex: %s", errbuf);
113 if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
114 /* Showing the whole changeset if needle exists */
115 for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
116 struct diff_filepair *p = q->queue[i];
117 if (diff_grep(p, &regex, o))
118 has_changes++;
120 if (has_changes)
121 return; /* do not munge the queue */
124 * Otherwise we will clear the whole queue by copying
125 * the empty outq at the end of this function, but
126 * first clear the current entries in the queue.
128 for (i = 0; i < q->nr; i++)
129 diff_free_filepair(q->queue[i]);
130 } else {
131 /* Showing only the filepairs that has the needle */
132 for (i = 0; i < q->nr; i++) {
133 struct diff_filepair *p = q->queue[i];
134 if (diff_grep(p, &regex, o))
135 diff_q(&outq, p);
136 else
137 diff_free_filepair(p);
141 regfree(&regex);
143 free(q->queue);
144 *q = outq;
145 return;
148 struct needle {
149 unsigned skip[256];
150 unsigned long len;
151 int use_regex;
152 regex_t regex;
153 unsigned char text[];
156 static void bmh_init(struct needle *needle)
158 int i;
159 for (i = 0; i < 256; i++)
160 needle->skip[i] = needle->len;
162 // Last character in needle can not be a mismatch in last position */
163 for (i = 0; i < needle->len - 1; i++)
164 needle->skip[needle->text[i]] = needle->len - i - 1;
168 static unsigned bmh(const unsigned char *haystack, unsigned long hlen,
169 const struct needle *needle)
171 int i;
173 if (hlen < needle->len)
174 return 0;
176 const unsigned char *end = haystack + hlen - needle->len;
177 unsigned cnt = 0;
178 while (haystack <= end) {
179 while (haystack[needle->len - 1] != needle->text[needle->len - 1]) {
180 haystack += needle->skip[haystack[needle->len - 1]];
181 if (haystack > end)
182 return cnt;
184 for (i = 0; i < needle->len - 1; i++)
185 if (haystack[i] != needle->text[i])
186 goto mismatch;
187 cnt++;
188 haystack += needle->len;
189 continue;
190 mismatch:
191 haystack += needle->skip[haystack[needle->len - 1]];
194 return cnt;
198 static unsigned int contains(struct diff_filespec *one,
199 struct needle *needle)
201 unsigned int cnt;
202 unsigned long sz;
203 const char *data;
204 if (diff_populate_filespec(one, 0))
205 return 0;
206 if (!needle->len)
207 return 0;
209 sz = one->size;
210 data = one->data;
211 cnt = 0;
213 if (needle->use_regex) {
214 regmatch_t regmatch;
215 int flags = 0;
217 assert(data[sz] == '\0');
218 while (*data && !regexec(&needle->regex, data, 1, &regmatch, flags)) {
219 flags |= REG_NOTBOL;
220 data += regmatch.rm_eo;
221 if (*data && regmatch.rm_so == regmatch.rm_eo)
222 data++;
223 cnt++;
226 } else
227 cnt = bmh(data, sz, needle);
228 diff_free_filespec_data(one);
229 return cnt;
232 static void diffcore_pickaxe_count(struct diff_options *o)
234 const char *needle_txt = o->pickaxe;
235 int opts = o->pickaxe_opts;
236 struct diff_queue_struct *q = &diff_queued_diff;
237 unsigned long len = strlen(needle_txt);
238 int i, has_changes;
239 struct diff_queue_struct outq;
240 DIFF_QUEUE_CLEAR(&outq);
241 struct needle *needle = NULL;
243 if (opts & DIFF_PICKAXE_REGEX) {
244 needle = xcalloc(1, sizeof(*needle));
245 needle->use_regex = 1;
246 needle->len = len;
247 int err;
248 err = regcomp(&needle->regex, needle_txt, REG_EXTENDED | REG_NEWLINE);
249 if (err) {
250 /* The POSIX.2 people are surely sick */
251 char errbuf[1024];
252 regerror(err, &needle->regex, errbuf, 1024);
253 regfree(&needle->regex);
254 die("invalid pickaxe regex: %s", errbuf);
257 else {
258 needle = xcalloc(1, sizeof(*needle) + len);
259 memcpy(needle->text, needle_txt, len);
260 needle->len = len;
261 bmh_init(needle);
264 if (opts & DIFF_PICKAXE_ALL) {
265 /* Showing the whole changeset if needle exists */
266 for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
267 struct diff_filepair *p = q->queue[i];
268 if (!DIFF_FILE_VALID(p->one)) {
269 if (!DIFF_FILE_VALID(p->two))
270 continue; /* ignore unmerged */
271 /* created */
272 if (contains(p->two, needle))
273 has_changes++;
275 else if (!DIFF_FILE_VALID(p->two)) {
276 if (contains(p->one, needle))
277 has_changes++;
279 else if (!diff_unmodified_pair(p) &&
280 contains(p->one, needle) !=
281 contains(p->two, needle))
282 has_changes++;
284 if (has_changes)
285 return; /* not munge the queue */
287 /* otherwise we will clear the whole queue
288 * by copying the empty outq at the end of this
289 * function, but first clear the current entries
290 * in the queue.
292 for (i = 0; i < q->nr; i++)
293 diff_free_filepair(q->queue[i]);
295 else
296 /* Showing only the filepairs that has the needle */
297 for (i = 0; i < q->nr; i++) {
298 struct diff_filepair *p = q->queue[i];
299 has_changes = 0;
300 if (!DIFF_FILE_VALID(p->one)) {
301 if (!DIFF_FILE_VALID(p->two))
302 ; /* ignore unmerged */
303 /* created */
304 else if (contains(p->two, needle))
305 has_changes = 1;
307 else if (!DIFF_FILE_VALID(p->two)) {
308 if (contains(p->one, needle))
309 has_changes = 1;
311 else if (!diff_unmodified_pair(p) &&
312 contains(p->one, needle) !=
313 contains(p->two, needle))
314 has_changes = 1;
316 if (has_changes)
317 diff_q(&outq, p);
318 else
319 diff_free_filepair(p);
322 if (opts & DIFF_PICKAXE_REGEX)
323 regfree(&regex);
325 free(q->queue);
326 *q = outq;
327 return;
330 void diffcore_pickaxe(struct diff_options *o)
332 /* Might want to warn when both S and G are on; I don't care... */
333 if (o->pickaxe_opts & DIFF_PICKAXE_KIND_G)
334 diffcore_pickaxe_grep(o);
335 else
336 diffcore_pickaxe_count(o);