range-diff: fix a crash in parsing git-log output
[git/raj.git] / range-diff.c
blob5cc920be391d2e93a69899e8e6e4b487fbefa0a3
1 #include "cache.h"
2 #include "range-diff.h"
3 #include "string-list.h"
4 #include "run-command.h"
5 #include "argv-array.h"
6 #include "hashmap.h"
7 #include "xdiff-interface.h"
8 #include "linear-assignment.h"
9 #include "diffcore.h"
10 #include "commit.h"
11 #include "pretty.h"
12 #include "userdiff.h"
13 #include "apply.h"
15 struct patch_util {
16 /* For the search for an exact match */
17 struct hashmap_entry e;
18 const char *diff, *patch;
20 int i, shown;
21 int diffsize;
22 size_t diff_offset;
23 /* the index of the matching item in the other branch, or -1 */
24 int matching;
25 struct object_id oid;
28 static size_t find_end_of_line(char *buffer, unsigned long size)
30 char *eol = memchr(buffer, '\n', size);
32 if (!eol)
33 return size;
35 *eol = '\0';
36 return eol + 1 - buffer;
40 * Reads the patches into a string list, with the `util` field being populated
41 * as struct object_id (will need to be free()d).
43 static int read_patches(const char *range, struct string_list *list,
44 const struct argv_array *other_arg)
46 struct child_process cp = CHILD_PROCESS_INIT;
47 struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT;
48 struct patch_util *util = NULL;
49 int in_header = 1;
50 char *line, *current_filename = NULL;
51 int offset, len;
52 size_t size;
54 argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges",
55 "--reverse", "--date-order", "--decorate=no",
56 "--no-prefix",
58 * Choose indicators that are not used anywhere
59 * else in diffs, but still look reasonable
60 * (e.g. will not be confusing when debugging)
62 "--output-indicator-new=>",
63 "--output-indicator-old=<",
64 "--output-indicator-context=#",
65 "--no-abbrev-commit",
66 "--pretty=medium",
67 "--notes",
68 NULL);
69 if (other_arg)
70 argv_array_pushv(&cp.args, other_arg->argv);
71 argv_array_push(&cp.args, range);
72 cp.out = -1;
73 cp.no_stdin = 1;
74 cp.git_cmd = 1;
76 if (start_command(&cp))
77 return error_errno(_("could not start `log`"));
78 if (strbuf_read(&contents, cp.out, 0) < 0) {
79 error_errno(_("could not read `log` output"));
80 finish_command(&cp);
81 return -1;
84 line = contents.buf;
85 size = contents.len;
86 for (offset = 0; size > 0; offset += len, size -= len, line += len) {
87 const char *p;
89 len = find_end_of_line(line, size);
90 line[len - 1] = '\0';
91 if (skip_prefix(line, "commit ", &p)) {
92 if (util) {
93 string_list_append(list, buf.buf)->util = util;
94 strbuf_reset(&buf);
96 util = xcalloc(sizeof(*util), 1);
97 if (get_oid(p, &util->oid)) {
98 error(_("could not parse commit '%s'"), p);
99 free(util);
100 string_list_clear(list, 1);
101 strbuf_release(&buf);
102 strbuf_release(&contents);
103 finish_command(&cp);
104 return -1;
106 util->matching = -1;
107 in_header = 1;
108 continue;
111 if (!util) {
112 error(_("could not parse first line of `log` output: "
113 "did not start with 'commit ': '%s'"),
114 line);
115 string_list_clear(list, 1);
116 strbuf_release(&buf);
117 strbuf_release(&contents);
118 finish_command(&cp);
119 return -1;
122 if (starts_with(line, "diff --git")) {
123 struct patch patch = { 0 };
124 struct strbuf root = STRBUF_INIT;
125 int linenr = 0;
127 in_header = 0;
128 strbuf_addch(&buf, '\n');
129 if (!util->diff_offset)
130 util->diff_offset = buf.len;
131 line[len - 1] = '\n';
132 len = parse_git_diff_header(&root, &linenr, 0, line,
133 len, size, &patch);
134 if (len < 0)
135 die(_("could not parse git header '%.*s'"), (int)len, line);
136 strbuf_addstr(&buf, " ## ");
137 if (patch.is_new > 0)
138 strbuf_addf(&buf, "%s (new)", patch.new_name);
139 else if (patch.is_delete > 0)
140 strbuf_addf(&buf, "%s (deleted)", patch.old_name);
141 else if (patch.is_rename)
142 strbuf_addf(&buf, "%s => %s", patch.old_name, patch.new_name);
143 else
144 strbuf_addstr(&buf, patch.new_name);
146 free(current_filename);
147 if (patch.is_delete > 0)
148 current_filename = xstrdup(patch.old_name);
149 else
150 current_filename = xstrdup(patch.new_name);
152 if (patch.new_mode && patch.old_mode &&
153 patch.old_mode != patch.new_mode)
154 strbuf_addf(&buf, " (mode change %06o => %06o)",
155 patch.old_mode, patch.new_mode);
157 strbuf_addstr(&buf, " ##");
158 } else if (in_header) {
159 if (starts_with(line, "Author: ")) {
160 strbuf_addstr(&buf, " ## Metadata ##\n");
161 strbuf_addstr(&buf, line);
162 strbuf_addstr(&buf, "\n\n");
163 strbuf_addstr(&buf, " ## Commit message ##\n");
164 } else if (starts_with(line, "Notes") &&
165 line[strlen(line) - 1] == ':') {
166 strbuf_addstr(&buf, "\n\n");
167 /* strip the trailing colon */
168 strbuf_addf(&buf, " ## %.*s ##\n",
169 (int)(strlen(line) - 1), line);
170 } else if (starts_with(line, " ")) {
171 p = line + len - 2;
172 while (isspace(*p) && p >= line)
173 p--;
174 strbuf_add(&buf, line, p - line + 1);
175 strbuf_addch(&buf, '\n');
177 continue;
178 } else if (skip_prefix(line, "@@ ", &p)) {
179 p = strstr(p, "@@");
180 strbuf_addstr(&buf, "@@");
181 if (current_filename && p[2])
182 strbuf_addf(&buf, " %s:", current_filename);
183 if (p)
184 strbuf_addstr(&buf, p + 2);
185 } else if (!line[0])
187 * A completely blank (not ' \n', which is context)
188 * line is not valid in a diff. We skip it
189 * silently, because this neatly handles the blank
190 * separator line between commits in git-log
191 * output.
193 continue;
194 else if (line[0] == '>') {
195 strbuf_addch(&buf, '+');
196 strbuf_addstr(&buf, line + 1);
197 } else if (line[0] == '<') {
198 strbuf_addch(&buf, '-');
199 strbuf_addstr(&buf, line + 1);
200 } else if (line[0] == '#') {
201 strbuf_addch(&buf, ' ');
202 strbuf_addstr(&buf, line + 1);
203 } else {
204 strbuf_addch(&buf, ' ');
205 strbuf_addstr(&buf, line);
208 strbuf_addch(&buf, '\n');
209 util->diffsize++;
211 strbuf_release(&contents);
213 if (util)
214 string_list_append(list, buf.buf)->util = util;
215 strbuf_release(&buf);
216 free(current_filename);
218 if (finish_command(&cp))
219 return -1;
221 return 0;
224 static int patch_util_cmp(const void *dummy, const struct patch_util *a,
225 const struct patch_util *b, const char *keydata)
227 return strcmp(a->diff, keydata ? keydata : b->diff);
230 static void find_exact_matches(struct string_list *a, struct string_list *b)
232 struct hashmap map;
233 int i;
235 hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0);
237 /* First, add the patches of a to a hash map */
238 for (i = 0; i < a->nr; i++) {
239 struct patch_util *util = a->items[i].util;
241 util->i = i;
242 util->patch = a->items[i].string;
243 util->diff = util->patch + util->diff_offset;
244 hashmap_entry_init(&util->e, strhash(util->diff));
245 hashmap_add(&map, &util->e);
248 /* Now try to find exact matches in b */
249 for (i = 0; i < b->nr; i++) {
250 struct patch_util *util = b->items[i].util, *other;
252 util->i = i;
253 util->patch = b->items[i].string;
254 util->diff = util->patch + util->diff_offset;
255 hashmap_entry_init(&util->e, strhash(util->diff));
256 other = hashmap_remove_entry(&map, util, e, NULL);
257 if (other) {
258 if (other->matching >= 0)
259 BUG("already assigned!");
261 other->matching = i;
262 util->matching = other->i;
266 hashmap_free(&map);
269 static void diffsize_consume(void *data, char *line, unsigned long len)
271 (*(int *)data)++;
274 static void diffsize_hunk(void *data, long ob, long on, long nb, long nn,
275 const char *funcline, long funclen)
277 diffsize_consume(data, NULL, 0);
280 static int diffsize(const char *a, const char *b)
282 xpparam_t pp = { 0 };
283 xdemitconf_t cfg = { 0 };
284 mmfile_t mf1, mf2;
285 int count = 0;
287 mf1.ptr = (char *)a;
288 mf1.size = strlen(a);
289 mf2.ptr = (char *)b;
290 mf2.size = strlen(b);
292 cfg.ctxlen = 3;
293 if (!xdi_diff_outf(&mf1, &mf2,
294 diffsize_hunk, diffsize_consume, &count,
295 &pp, &cfg))
296 return count;
298 error(_("failed to generate diff"));
299 return COST_MAX;
302 static void get_correspondences(struct string_list *a, struct string_list *b,
303 int creation_factor)
305 int n = a->nr + b->nr;
306 int *cost, c, *a2b, *b2a;
307 int i, j;
309 ALLOC_ARRAY(cost, st_mult(n, n));
310 ALLOC_ARRAY(a2b, n);
311 ALLOC_ARRAY(b2a, n);
313 for (i = 0; i < a->nr; i++) {
314 struct patch_util *a_util = a->items[i].util;
316 for (j = 0; j < b->nr; j++) {
317 struct patch_util *b_util = b->items[j].util;
319 if (a_util->matching == j)
320 c = 0;
321 else if (a_util->matching < 0 && b_util->matching < 0)
322 c = diffsize(a_util->diff, b_util->diff);
323 else
324 c = COST_MAX;
325 cost[i + n * j] = c;
328 c = a_util->matching < 0 ?
329 a_util->diffsize * creation_factor / 100 : COST_MAX;
330 for (j = b->nr; j < n; j++)
331 cost[i + n * j] = c;
334 for (j = 0; j < b->nr; j++) {
335 struct patch_util *util = b->items[j].util;
337 c = util->matching < 0 ?
338 util->diffsize * creation_factor / 100 : COST_MAX;
339 for (i = a->nr; i < n; i++)
340 cost[i + n * j] = c;
343 for (i = a->nr; i < n; i++)
344 for (j = b->nr; j < n; j++)
345 cost[i + n * j] = 0;
347 compute_assignment(n, n, cost, a2b, b2a);
349 for (i = 0; i < a->nr; i++)
350 if (a2b[i] >= 0 && a2b[i] < b->nr) {
351 struct patch_util *a_util = a->items[i].util;
352 struct patch_util *b_util = b->items[a2b[i]].util;
354 a_util->matching = a2b[i];
355 b_util->matching = i;
358 free(cost);
359 free(a2b);
360 free(b2a);
363 static void output_pair_header(struct diff_options *diffopt,
364 int patch_no_width,
365 struct strbuf *buf,
366 struct strbuf *dashes,
367 struct patch_util *a_util,
368 struct patch_util *b_util)
370 struct object_id *oid = a_util ? &a_util->oid : &b_util->oid;
371 struct commit *commit;
372 char status;
373 const char *color_reset = diff_get_color_opt(diffopt, DIFF_RESET);
374 const char *color_old = diff_get_color_opt(diffopt, DIFF_FILE_OLD);
375 const char *color_new = diff_get_color_opt(diffopt, DIFF_FILE_NEW);
376 const char *color_commit = diff_get_color_opt(diffopt, DIFF_COMMIT);
377 const char *color;
379 if (!dashes->len)
380 strbuf_addchars(dashes, '-',
381 strlen(find_unique_abbrev(oid,
382 DEFAULT_ABBREV)));
384 if (!b_util) {
385 color = color_old;
386 status = '<';
387 } else if (!a_util) {
388 color = color_new;
389 status = '>';
390 } else if (strcmp(a_util->patch, b_util->patch)) {
391 color = color_commit;
392 status = '!';
393 } else {
394 color = color_commit;
395 status = '=';
398 strbuf_reset(buf);
399 strbuf_addstr(buf, status == '!' ? color_old : color);
400 if (!a_util)
401 strbuf_addf(buf, "%*s: %s ", patch_no_width, "-", dashes->buf);
402 else
403 strbuf_addf(buf, "%*d: %s ", patch_no_width, a_util->i + 1,
404 find_unique_abbrev(&a_util->oid, DEFAULT_ABBREV));
406 if (status == '!')
407 strbuf_addf(buf, "%s%s", color_reset, color);
408 strbuf_addch(buf, status);
409 if (status == '!')
410 strbuf_addf(buf, "%s%s", color_reset, color_new);
412 if (!b_util)
413 strbuf_addf(buf, " %*s: %s", patch_no_width, "-", dashes->buf);
414 else
415 strbuf_addf(buf, " %*d: %s", patch_no_width, b_util->i + 1,
416 find_unique_abbrev(&b_util->oid, DEFAULT_ABBREV));
418 commit = lookup_commit_reference(the_repository, oid);
419 if (commit) {
420 if (status == '!')
421 strbuf_addf(buf, "%s%s", color_reset, color);
423 strbuf_addch(buf, ' ');
424 pp_commit_easy(CMIT_FMT_ONELINE, commit, buf);
426 strbuf_addf(buf, "%s\n", color_reset);
428 fwrite(buf->buf, buf->len, 1, diffopt->file);
431 static struct userdiff_driver section_headers = {
432 .funcname = { "^ ## (.*) ##$\n"
433 "^.?@@ (.*)$", REG_EXTENDED }
436 static struct diff_filespec *get_filespec(const char *name, const char *p)
438 struct diff_filespec *spec = alloc_filespec(name);
440 fill_filespec(spec, &null_oid, 0, 0100644);
441 spec->data = (char *)p;
442 spec->size = strlen(p);
443 spec->should_munmap = 0;
444 spec->is_stdin = 1;
445 spec->driver = &section_headers;
447 return spec;
450 static void patch_diff(const char *a, const char *b,
451 struct diff_options *diffopt)
453 diff_queue(&diff_queued_diff,
454 get_filespec("a", a), get_filespec("b", b));
456 diffcore_std(diffopt);
457 diff_flush(diffopt);
460 static void output(struct string_list *a, struct string_list *b,
461 struct diff_options *diffopt)
463 struct strbuf buf = STRBUF_INIT, dashes = STRBUF_INIT;
464 int patch_no_width = decimal_width(1 + (a->nr > b->nr ? a->nr : b->nr));
465 int i = 0, j = 0;
468 * We assume the user is really more interested in the second argument
469 * ("newer" version). To that end, we print the output in the order of
470 * the RHS (the `b` parameter). To put the LHS (the `a` parameter)
471 * commits that are no longer in the RHS into a good place, we place
472 * them once we have shown all of their predecessors in the LHS.
475 while (i < a->nr || j < b->nr) {
476 struct patch_util *a_util, *b_util;
477 a_util = i < a->nr ? a->items[i].util : NULL;
478 b_util = j < b->nr ? b->items[j].util : NULL;
480 /* Skip all the already-shown commits from the LHS. */
481 while (i < a->nr && a_util->shown)
482 a_util = ++i < a->nr ? a->items[i].util : NULL;
484 /* Show unmatched LHS commit whose predecessors were shown. */
485 if (i < a->nr && a_util->matching < 0) {
486 output_pair_header(diffopt, patch_no_width,
487 &buf, &dashes, a_util, NULL);
488 i++;
489 continue;
492 /* Show unmatched RHS commits. */
493 while (j < b->nr && b_util->matching < 0) {
494 output_pair_header(diffopt, patch_no_width,
495 &buf, &dashes, NULL, b_util);
496 b_util = ++j < b->nr ? b->items[j].util : NULL;
499 /* Show matching LHS/RHS pair. */
500 if (j < b->nr) {
501 a_util = a->items[b_util->matching].util;
502 output_pair_header(diffopt, patch_no_width,
503 &buf, &dashes, a_util, b_util);
504 if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
505 patch_diff(a->items[b_util->matching].string,
506 b->items[j].string, diffopt);
507 a_util->shown = 1;
508 j++;
511 strbuf_release(&buf);
512 strbuf_release(&dashes);
515 static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data)
517 return data;
520 int show_range_diff(const char *range1, const char *range2,
521 int creation_factor, int dual_color,
522 const struct diff_options *diffopt,
523 const struct argv_array *other_arg)
525 int res = 0;
527 struct string_list branch1 = STRING_LIST_INIT_DUP;
528 struct string_list branch2 = STRING_LIST_INIT_DUP;
530 if (read_patches(range1, &branch1, other_arg))
531 res = error(_("could not parse log for '%s'"), range1);
532 if (!res && read_patches(range2, &branch2, other_arg))
533 res = error(_("could not parse log for '%s'"), range2);
535 if (!res) {
536 struct diff_options opts;
537 struct strbuf indent = STRBUF_INIT;
539 if (diffopt)
540 memcpy(&opts, diffopt, sizeof(opts));
541 else
542 diff_setup(&opts);
544 if (!opts.output_format)
545 opts.output_format = DIFF_FORMAT_PATCH;
546 opts.flags.suppress_diff_headers = 1;
547 opts.flags.dual_color_diffed_diffs = dual_color;
548 opts.flags.suppress_hunk_header_line_count = 1;
549 opts.output_prefix = output_prefix_cb;
550 strbuf_addstr(&indent, " ");
551 opts.output_prefix_data = &indent;
552 diff_setup_done(&opts);
554 find_exact_matches(&branch1, &branch2);
555 get_correspondences(&branch1, &branch2, creation_factor);
556 output(&branch1, &branch2, &opts);
558 strbuf_release(&indent);
561 string_list_clear(&branch1, 1);
562 string_list_clear(&branch2, 1);
564 return res;