add a howto document about corrupted blob recovery
[git/repo.git] / pretty.c
blob490cede263020dc15f4d48af06faf8d77552883f
1 #include "cache.h"
2 #include "commit.h"
3 #include "interpolate.h"
4 #include "utf8.h"
5 #include "diff.h"
6 #include "revision.h"
8 static struct cmt_fmt_map {
9 const char *n;
10 size_t cmp_len;
11 enum cmit_fmt v;
12 } cmt_fmts[] = {
13 { "raw", 1, CMIT_FMT_RAW },
14 { "medium", 1, CMIT_FMT_MEDIUM },
15 { "short", 1, CMIT_FMT_SHORT },
16 { "email", 1, CMIT_FMT_EMAIL },
17 { "full", 5, CMIT_FMT_FULL },
18 { "fuller", 5, CMIT_FMT_FULLER },
19 { "oneline", 1, CMIT_FMT_ONELINE },
20 { "format:", 7, CMIT_FMT_USERFORMAT},
23 static char *user_format;
25 enum cmit_fmt get_commit_format(const char *arg)
27 int i;
29 if (!arg || !*arg)
30 return CMIT_FMT_DEFAULT;
31 if (*arg == '=')
32 arg++;
33 if (!prefixcmp(arg, "format:")) {
34 if (user_format)
35 free(user_format);
36 user_format = xstrdup(arg + 7);
37 return CMIT_FMT_USERFORMAT;
39 for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {
40 if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&
41 !strncmp(arg, cmt_fmts[i].n, strlen(arg)))
42 return cmt_fmts[i].v;
45 die("invalid --pretty format: %s", arg);
49 * Generic support for pretty-printing the header
51 static int get_one_line(const char *msg)
53 int ret = 0;
55 for (;;) {
56 char c = *msg++;
57 if (!c)
58 break;
59 ret++;
60 if (c == '\n')
61 break;
63 return ret;
66 /* High bit set, or ISO-2022-INT */
67 int non_ascii(int ch)
69 ch = (ch & 0xff);
70 return ((ch & 0x80) || (ch == 0x1b));
73 static int is_rfc2047_special(char ch)
75 return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
78 static void add_rfc2047(struct strbuf *sb, const char *line, int len,
79 const char *encoding)
81 int i, last;
83 for (i = 0; i < len; i++) {
84 int ch = line[i];
85 if (non_ascii(ch))
86 goto needquote;
87 if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
88 goto needquote;
90 strbuf_add(sb, line, len);
91 return;
93 needquote:
94 strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
95 strbuf_addf(sb, "=?%s?q?", encoding);
96 for (i = last = 0; i < len; i++) {
97 unsigned ch = line[i] & 0xFF;
99 * We encode ' ' using '=20' even though rfc2047
100 * allows using '_' for readability. Unfortunately,
101 * many programs do not understand this and just
102 * leave the underscore in place.
104 if (is_rfc2047_special(ch) || ch == ' ') {
105 strbuf_add(sb, line + last, i - last);
106 strbuf_addf(sb, "=%02X", ch);
107 last = i + 1;
110 strbuf_add(sb, line + last, len - last);
111 strbuf_addstr(sb, "?=");
114 static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
115 const char *line, enum date_mode dmode,
116 const char *encoding)
118 char *date;
119 int namelen;
120 unsigned long time;
121 int tz;
122 const char *filler = " ";
124 if (fmt == CMIT_FMT_ONELINE)
125 return;
126 date = strchr(line, '>');
127 if (!date)
128 return;
129 namelen = ++date - line;
130 time = strtoul(date, &date, 10);
131 tz = strtol(date, NULL, 10);
133 if (fmt == CMIT_FMT_EMAIL) {
134 char *name_tail = strchr(line, '<');
135 int display_name_length;
136 if (!name_tail)
137 return;
138 while (line < name_tail && isspace(name_tail[-1]))
139 name_tail--;
140 display_name_length = name_tail - line;
141 filler = "";
142 strbuf_addstr(sb, "From: ");
143 add_rfc2047(sb, line, display_name_length, encoding);
144 strbuf_add(sb, name_tail, namelen - display_name_length);
145 strbuf_addch(sb, '\n');
146 } else {
147 strbuf_addf(sb, "%s: %.*s%.*s\n", what,
148 (fmt == CMIT_FMT_FULLER) ? 4 : 0,
149 filler, namelen, line);
151 switch (fmt) {
152 case CMIT_FMT_MEDIUM:
153 strbuf_addf(sb, "Date: %s\n", show_date(time, tz, dmode));
154 break;
155 case CMIT_FMT_EMAIL:
156 strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));
157 break;
158 case CMIT_FMT_FULLER:
159 strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));
160 break;
161 default:
162 /* notin' */
163 break;
167 static int is_empty_line(const char *line, int *len_p)
169 int len = *len_p;
170 while (len && isspace(line[len-1]))
171 len--;
172 *len_p = len;
173 return !len;
176 static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,
177 const struct commit *commit, int abbrev)
179 struct commit_list *parent = commit->parents;
181 if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) ||
182 !parent || !parent->next)
183 return;
185 strbuf_addstr(sb, "Merge:");
187 while (parent) {
188 struct commit *p = parent->item;
189 const char *hex = NULL;
190 const char *dots;
191 if (abbrev)
192 hex = find_unique_abbrev(p->object.sha1, abbrev);
193 if (!hex)
194 hex = sha1_to_hex(p->object.sha1);
195 dots = (abbrev && strlen(hex) != 40) ? "..." : "";
196 parent = parent->next;
198 strbuf_addf(sb, " %s%s", hex, dots);
200 strbuf_addch(sb, '\n');
203 static char *get_header(const struct commit *commit, const char *key)
205 int key_len = strlen(key);
206 const char *line = commit->buffer;
208 for (;;) {
209 const char *eol = strchr(line, '\n'), *next;
211 if (line == eol)
212 return NULL;
213 if (!eol) {
214 eol = line + strlen(line);
215 next = NULL;
216 } else
217 next = eol + 1;
218 if (eol - line > key_len &&
219 !strncmp(line, key, key_len) &&
220 line[key_len] == ' ') {
221 return xmemdupz(line + key_len + 1, eol - line - key_len - 1);
223 line = next;
227 static char *replace_encoding_header(char *buf, const char *encoding)
229 struct strbuf tmp;
230 size_t start, len;
231 char *cp = buf;
233 /* guess if there is an encoding header before a \n\n */
234 while (strncmp(cp, "encoding ", strlen("encoding "))) {
235 cp = strchr(cp, '\n');
236 if (!cp || *++cp == '\n')
237 return buf;
239 start = cp - buf;
240 cp = strchr(cp, '\n');
241 if (!cp)
242 return buf; /* should not happen but be defensive */
243 len = cp + 1 - (buf + start);
245 strbuf_init(&tmp, 0);
246 strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
247 if (is_encoding_utf8(encoding)) {
248 /* we have re-coded to UTF-8; drop the header */
249 strbuf_remove(&tmp, start, len);
250 } else {
251 /* just replaces XXXX in 'encoding XXXX\n' */
252 strbuf_splice(&tmp, start + strlen("encoding "),
253 len - strlen("encoding \n"),
254 encoding, strlen(encoding));
256 return strbuf_detach(&tmp, NULL);
259 static char *logmsg_reencode(const struct commit *commit,
260 const char *output_encoding)
262 static const char *utf8 = "utf-8";
263 const char *use_encoding;
264 char *encoding;
265 char *out;
267 if (!*output_encoding)
268 return NULL;
269 encoding = get_header(commit, "encoding");
270 use_encoding = encoding ? encoding : utf8;
271 if (!strcmp(use_encoding, output_encoding))
272 if (encoding) /* we'll strip encoding header later */
273 out = xstrdup(commit->buffer);
274 else
275 return NULL; /* nothing to do */
276 else
277 out = reencode_string(commit->buffer,
278 output_encoding, use_encoding);
279 if (out)
280 out = replace_encoding_header(out, output_encoding);
282 free(encoding);
283 return out;
286 static void fill_person(struct interp *table, const char *msg, int len)
288 int start, end, tz = 0;
289 unsigned long date;
290 char *ep;
292 /* parse name */
293 for (end = 0; end < len && msg[end] != '<'; end++)
294 ; /* do nothing */
295 start = end + 1;
296 while (end > 0 && isspace(msg[end - 1]))
297 end--;
298 table[0].value = xmemdupz(msg, end);
300 if (start >= len)
301 return;
303 /* parse email */
304 for (end = start + 1; end < len && msg[end] != '>'; end++)
305 ; /* do nothing */
307 if (end >= len)
308 return;
310 table[1].value = xmemdupz(msg + start, end - start);
312 /* parse date */
313 for (start = end + 1; start < len && isspace(msg[start]); start++)
314 ; /* do nothing */
315 if (start >= len)
316 return;
317 date = strtoul(msg + start, &ep, 10);
318 if (msg + start == ep)
319 return;
321 table[5].value = xmemdupz(msg + start, ep - (msg + start));
323 /* parse tz */
324 for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
325 ; /* do nothing */
326 if (start + 1 < len) {
327 tz = strtoul(msg + start + 1, NULL, 10);
328 if (msg[start] == '-')
329 tz = -tz;
332 interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL));
333 interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822));
334 interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE));
335 interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601));
338 void format_commit_message(const struct commit *commit,
339 const void *format, struct strbuf *sb)
341 struct interp table[] = {
342 { "%H" }, /* commit hash */
343 { "%h" }, /* abbreviated commit hash */
344 { "%T" }, /* tree hash */
345 { "%t" }, /* abbreviated tree hash */
346 { "%P" }, /* parent hashes */
347 { "%p" }, /* abbreviated parent hashes */
348 { "%an" }, /* author name */
349 { "%ae" }, /* author email */
350 { "%ad" }, /* author date */
351 { "%aD" }, /* author date, RFC2822 style */
352 { "%ar" }, /* author date, relative */
353 { "%at" }, /* author date, UNIX timestamp */
354 { "%ai" }, /* author date, ISO 8601 */
355 { "%cn" }, /* committer name */
356 { "%ce" }, /* committer email */
357 { "%cd" }, /* committer date */
358 { "%cD" }, /* committer date, RFC2822 style */
359 { "%cr" }, /* committer date, relative */
360 { "%ct" }, /* committer date, UNIX timestamp */
361 { "%ci" }, /* committer date, ISO 8601 */
362 { "%e" }, /* encoding */
363 { "%s" }, /* subject */
364 { "%b" }, /* body */
365 { "%Cred" }, /* red */
366 { "%Cgreen" }, /* green */
367 { "%Cblue" }, /* blue */
368 { "%Creset" }, /* reset color */
369 { "%n" }, /* newline */
370 { "%m" }, /* left/right/bottom */
372 enum interp_index {
373 IHASH = 0, IHASH_ABBREV,
374 ITREE, ITREE_ABBREV,
375 IPARENTS, IPARENTS_ABBREV,
376 IAUTHOR_NAME, IAUTHOR_EMAIL,
377 IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
378 IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
379 ICOMMITTER_NAME, ICOMMITTER_EMAIL,
380 ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822,
381 ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP,
382 ICOMMITTER_ISO8601,
383 IENCODING,
384 ISUBJECT,
385 IBODY,
386 IRED, IGREEN, IBLUE, IRESET_COLOR,
387 INEWLINE,
388 ILEFT_RIGHT,
390 struct commit_list *p;
391 char parents[1024];
392 unsigned long len;
393 int i;
394 enum { HEADER, SUBJECT, BODY } state;
395 const char *msg = commit->buffer;
397 if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
398 die("invalid interp table!");
400 /* these are independent of the commit */
401 interp_set_entry(table, IRED, "\033[31m");
402 interp_set_entry(table, IGREEN, "\033[32m");
403 interp_set_entry(table, IBLUE, "\033[34m");
404 interp_set_entry(table, IRESET_COLOR, "\033[m");
405 interp_set_entry(table, INEWLINE, "\n");
407 /* these depend on the commit */
408 if (!commit->object.parsed)
409 parse_object(commit->object.sha1);
410 interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
411 interp_set_entry(table, IHASH_ABBREV,
412 find_unique_abbrev(commit->object.sha1,
413 DEFAULT_ABBREV));
414 interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
415 interp_set_entry(table, ITREE_ABBREV,
416 find_unique_abbrev(commit->tree->object.sha1,
417 DEFAULT_ABBREV));
418 interp_set_entry(table, ILEFT_RIGHT,
419 (commit->object.flags & BOUNDARY)
420 ? "-"
421 : (commit->object.flags & SYMMETRIC_LEFT)
422 ? "<"
423 : ">");
425 parents[1] = 0;
426 for (i = 0, p = commit->parents;
427 p && i < sizeof(parents) - 1;
428 p = p->next)
429 i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
430 sha1_to_hex(p->item->object.sha1));
431 interp_set_entry(table, IPARENTS, parents + 1);
433 parents[1] = 0;
434 for (i = 0, p = commit->parents;
435 p && i < sizeof(parents) - 1;
436 p = p->next)
437 i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
438 find_unique_abbrev(p->item->object.sha1,
439 DEFAULT_ABBREV));
440 interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
442 for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
443 int eol;
444 for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
445 ; /* do nothing */
447 if (state == SUBJECT) {
448 table[ISUBJECT].value = xmemdupz(msg + i, eol - i);
449 i = eol;
451 if (i == eol) {
452 state++;
453 /* strip empty lines */
454 while (msg[eol + 1] == '\n')
455 eol++;
456 } else if (!prefixcmp(msg + i, "author "))
457 fill_person(table + IAUTHOR_NAME,
458 msg + i + 7, eol - i - 7);
459 else if (!prefixcmp(msg + i, "committer "))
460 fill_person(table + ICOMMITTER_NAME,
461 msg + i + 10, eol - i - 10);
462 else if (!prefixcmp(msg + i, "encoding "))
463 table[IENCODING].value =
464 xmemdupz(msg + i + 9, eol - i - 9);
465 i = eol;
467 if (msg[i])
468 table[IBODY].value = xstrdup(msg + i);
470 len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
471 format, table, ARRAY_SIZE(table));
472 if (len > strbuf_avail(sb)) {
473 strbuf_grow(sb, len);
474 interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1,
475 format, table, ARRAY_SIZE(table));
477 strbuf_setlen(sb, sb->len + len);
478 interp_clear_table(table, ARRAY_SIZE(table));
481 static void pp_header(enum cmit_fmt fmt,
482 int abbrev,
483 enum date_mode dmode,
484 const char *encoding,
485 const struct commit *commit,
486 const char **msg_p,
487 struct strbuf *sb)
489 int parents_shown = 0;
491 for (;;) {
492 const char *line = *msg_p;
493 int linelen = get_one_line(*msg_p);
495 if (!linelen)
496 return;
497 *msg_p += linelen;
499 if (linelen == 1)
500 /* End of header */
501 return;
503 if (fmt == CMIT_FMT_RAW) {
504 strbuf_add(sb, line, linelen);
505 continue;
508 if (!memcmp(line, "parent ", 7)) {
509 if (linelen != 48)
510 die("bad parent line in commit");
511 continue;
514 if (!parents_shown) {
515 struct commit_list *parent;
516 int num;
517 for (parent = commit->parents, num = 0;
518 parent;
519 parent = parent->next, num++)
521 /* with enough slop */
522 strbuf_grow(sb, num * 50 + 20);
523 add_merge_info(fmt, sb, commit, abbrev);
524 parents_shown = 1;
528 * MEDIUM == DEFAULT shows only author with dates.
529 * FULL shows both authors but not dates.
530 * FULLER shows both authors and dates.
532 if (!memcmp(line, "author ", 7)) {
533 strbuf_grow(sb, linelen + 80);
534 add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
536 if (!memcmp(line, "committer ", 10) &&
537 (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
538 strbuf_grow(sb, linelen + 80);
539 add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
544 static void pp_title_line(enum cmit_fmt fmt,
545 const char **msg_p,
546 struct strbuf *sb,
547 const char *subject,
548 const char *after_subject,
549 const char *encoding,
550 int plain_non_ascii)
552 struct strbuf title;
554 strbuf_init(&title, 80);
556 for (;;) {
557 const char *line = *msg_p;
558 int linelen = get_one_line(line);
560 *msg_p += linelen;
561 if (!linelen || is_empty_line(line, &linelen))
562 break;
564 strbuf_grow(&title, linelen + 2);
565 if (title.len) {
566 if (fmt == CMIT_FMT_EMAIL) {
567 strbuf_addch(&title, '\n');
569 strbuf_addch(&title, ' ');
571 strbuf_add(&title, line, linelen);
574 strbuf_grow(sb, title.len + 1024);
575 if (subject) {
576 strbuf_addstr(sb, subject);
577 add_rfc2047(sb, title.buf, title.len, encoding);
578 } else {
579 strbuf_addbuf(sb, &title);
581 strbuf_addch(sb, '\n');
583 if (plain_non_ascii) {
584 const char *header_fmt =
585 "MIME-Version: 1.0\n"
586 "Content-Type: text/plain; charset=%s\n"
587 "Content-Transfer-Encoding: 8bit\n";
588 strbuf_addf(sb, header_fmt, encoding);
590 if (after_subject) {
591 strbuf_addstr(sb, after_subject);
593 if (fmt == CMIT_FMT_EMAIL) {
594 strbuf_addch(sb, '\n');
596 strbuf_release(&title);
599 static void pp_remainder(enum cmit_fmt fmt,
600 const char **msg_p,
601 struct strbuf *sb,
602 int indent)
604 int first = 1;
605 for (;;) {
606 const char *line = *msg_p;
607 int linelen = get_one_line(line);
608 *msg_p += linelen;
610 if (!linelen)
611 break;
613 if (is_empty_line(line, &linelen)) {
614 if (first)
615 continue;
616 if (fmt == CMIT_FMT_SHORT)
617 break;
619 first = 0;
621 strbuf_grow(sb, linelen + indent + 20);
622 if (indent) {
623 memset(sb->buf + sb->len, ' ', indent);
624 strbuf_setlen(sb, sb->len + indent);
626 strbuf_add(sb, line, linelen);
627 strbuf_addch(sb, '\n');
631 void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
632 struct strbuf *sb, int abbrev,
633 const char *subject, const char *after_subject,
634 enum date_mode dmode, int plain_non_ascii)
636 unsigned long beginning_of_body;
637 int indent = 4;
638 const char *msg = commit->buffer;
639 char *reencoded;
640 const char *encoding;
642 if (fmt == CMIT_FMT_USERFORMAT) {
643 format_commit_message(commit, user_format, sb);
644 return;
647 encoding = (git_log_output_encoding
648 ? git_log_output_encoding
649 : git_commit_encoding);
650 if (!encoding)
651 encoding = "utf-8";
652 reencoded = logmsg_reencode(commit, encoding);
653 if (reencoded) {
654 msg = reencoded;
657 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
658 indent = 0;
660 /* After-subject is used to pass in Content-Type: multipart
661 * MIME header; in that case we do not have to do the
662 * plaintext content type even if the commit message has
663 * non 7-bit ASCII character. Otherwise, check if we need
664 * to say this is not a 7-bit ASCII.
666 if (fmt == CMIT_FMT_EMAIL && !after_subject) {
667 int i, ch, in_body;
669 for (in_body = i = 0; (ch = msg[i]); i++) {
670 if (!in_body) {
671 /* author could be non 7-bit ASCII but
672 * the log may be so; skip over the
673 * header part first.
675 if (ch == '\n' && msg[i+1] == '\n')
676 in_body = 1;
678 else if (non_ascii(ch)) {
679 plain_non_ascii = 1;
680 break;
685 pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);
686 if (fmt != CMIT_FMT_ONELINE && !subject) {
687 strbuf_addch(sb, '\n');
690 /* Skip excess blank lines at the beginning of body, if any... */
691 for (;;) {
692 int linelen = get_one_line(msg);
693 int ll = linelen;
694 if (!linelen)
695 break;
696 if (!is_empty_line(msg, &ll))
697 break;
698 msg += linelen;
701 /* These formats treat the title line specially. */
702 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
703 pp_title_line(fmt, &msg, sb, subject,
704 after_subject, encoding, plain_non_ascii);
706 beginning_of_body = sb->len;
707 if (fmt != CMIT_FMT_ONELINE)
708 pp_remainder(fmt, &msg, sb, indent);
709 strbuf_rtrim(sb);
711 /* Make sure there is an EOLN for the non-oneline case */
712 if (fmt != CMIT_FMT_ONELINE)
713 strbuf_addch(sb, '\n');
716 * The caller may append additional body text in e-mail
717 * format. Make sure we did not strip the blank line
718 * between the header and the body.
720 if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
721 strbuf_addch(sb, '\n');
722 free(reencoded);