--pretty=format: parse commit message only once
[git/gitweb.git] / pretty.c
blob17a3010a6ecf315f75d7d6c970ac4b3d1877d168
1 #include "cache.h"
2 #include "commit.h"
3 #include "utf8.h"
4 #include "diff.h"
5 #include "revision.h"
7 static struct cmt_fmt_map {
8 const char *n;
9 size_t cmp_len;
10 enum cmit_fmt v;
11 } cmt_fmts[] = {
12 { "raw", 1, CMIT_FMT_RAW },
13 { "medium", 1, CMIT_FMT_MEDIUM },
14 { "short", 1, CMIT_FMT_SHORT },
15 { "email", 1, CMIT_FMT_EMAIL },
16 { "full", 5, CMIT_FMT_FULL },
17 { "fuller", 5, CMIT_FMT_FULLER },
18 { "oneline", 1, CMIT_FMT_ONELINE },
19 { "format:", 7, CMIT_FMT_USERFORMAT},
22 static char *user_format;
24 enum cmit_fmt get_commit_format(const char *arg)
26 int i;
28 if (!arg || !*arg)
29 return CMIT_FMT_DEFAULT;
30 if (*arg == '=')
31 arg++;
32 if (!prefixcmp(arg, "format:")) {
33 if (user_format)
34 free(user_format);
35 user_format = xstrdup(arg + 7);
36 return CMIT_FMT_USERFORMAT;
38 for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {
39 if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&
40 !strncmp(arg, cmt_fmts[i].n, strlen(arg)))
41 return cmt_fmts[i].v;
44 die("invalid --pretty format: %s", arg);
48 * Generic support for pretty-printing the header
50 static int get_one_line(const char *msg)
52 int ret = 0;
54 for (;;) {
55 char c = *msg++;
56 if (!c)
57 break;
58 ret++;
59 if (c == '\n')
60 break;
62 return ret;
65 /* High bit set, or ISO-2022-INT */
66 int non_ascii(int ch)
68 ch = (ch & 0xff);
69 return ((ch & 0x80) || (ch == 0x1b));
72 static int is_rfc2047_special(char ch)
74 return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
77 static void add_rfc2047(struct strbuf *sb, const char *line, int len,
78 const char *encoding)
80 int i, last;
82 for (i = 0; i < len; i++) {
83 int ch = line[i];
84 if (non_ascii(ch))
85 goto needquote;
86 if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
87 goto needquote;
89 strbuf_add(sb, line, len);
90 return;
92 needquote:
93 strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
94 strbuf_addf(sb, "=?%s?q?", encoding);
95 for (i = last = 0; i < len; i++) {
96 unsigned ch = line[i] & 0xFF;
98 * We encode ' ' using '=20' even though rfc2047
99 * allows using '_' for readability. Unfortunately,
100 * many programs do not understand this and just
101 * leave the underscore in place.
103 if (is_rfc2047_special(ch) || ch == ' ') {
104 strbuf_add(sb, line + last, i - last);
105 strbuf_addf(sb, "=%02X", ch);
106 last = i + 1;
109 strbuf_add(sb, line + last, len - last);
110 strbuf_addstr(sb, "?=");
113 static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
114 const char *line, enum date_mode dmode,
115 const char *encoding)
117 char *date;
118 int namelen;
119 unsigned long time;
120 int tz;
121 const char *filler = " ";
123 if (fmt == CMIT_FMT_ONELINE)
124 return;
125 date = strchr(line, '>');
126 if (!date)
127 return;
128 namelen = ++date - line;
129 time = strtoul(date, &date, 10);
130 tz = strtol(date, NULL, 10);
132 if (fmt == CMIT_FMT_EMAIL) {
133 char *name_tail = strchr(line, '<');
134 int display_name_length;
135 if (!name_tail)
136 return;
137 while (line < name_tail && isspace(name_tail[-1]))
138 name_tail--;
139 display_name_length = name_tail - line;
140 filler = "";
141 strbuf_addstr(sb, "From: ");
142 add_rfc2047(sb, line, display_name_length, encoding);
143 strbuf_add(sb, name_tail, namelen - display_name_length);
144 strbuf_addch(sb, '\n');
145 } else {
146 strbuf_addf(sb, "%s: %.*s%.*s\n", what,
147 (fmt == CMIT_FMT_FULLER) ? 4 : 0,
148 filler, namelen, line);
150 switch (fmt) {
151 case CMIT_FMT_MEDIUM:
152 strbuf_addf(sb, "Date: %s\n", show_date(time, tz, dmode));
153 break;
154 case CMIT_FMT_EMAIL:
155 strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));
156 break;
157 case CMIT_FMT_FULLER:
158 strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));
159 break;
160 default:
161 /* notin' */
162 break;
166 static int is_empty_line(const char *line, int *len_p)
168 int len = *len_p;
169 while (len && isspace(line[len-1]))
170 len--;
171 *len_p = len;
172 return !len;
175 static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,
176 const struct commit *commit, int abbrev)
178 struct commit_list *parent = commit->parents;
180 if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) ||
181 !parent || !parent->next)
182 return;
184 strbuf_addstr(sb, "Merge:");
186 while (parent) {
187 struct commit *p = parent->item;
188 const char *hex = NULL;
189 const char *dots;
190 if (abbrev)
191 hex = find_unique_abbrev(p->object.sha1, abbrev);
192 if (!hex)
193 hex = sha1_to_hex(p->object.sha1);
194 dots = (abbrev && strlen(hex) != 40) ? "..." : "";
195 parent = parent->next;
197 strbuf_addf(sb, " %s%s", hex, dots);
199 strbuf_addch(sb, '\n');
202 static char *get_header(const struct commit *commit, const char *key)
204 int key_len = strlen(key);
205 const char *line = commit->buffer;
207 for (;;) {
208 const char *eol = strchr(line, '\n'), *next;
210 if (line == eol)
211 return NULL;
212 if (!eol) {
213 eol = line + strlen(line);
214 next = NULL;
215 } else
216 next = eol + 1;
217 if (eol - line > key_len &&
218 !strncmp(line, key, key_len) &&
219 line[key_len] == ' ') {
220 return xmemdupz(line + key_len + 1, eol - line - key_len - 1);
222 line = next;
226 static char *replace_encoding_header(char *buf, const char *encoding)
228 struct strbuf tmp;
229 size_t start, len;
230 char *cp = buf;
232 /* guess if there is an encoding header before a \n\n */
233 while (strncmp(cp, "encoding ", strlen("encoding "))) {
234 cp = strchr(cp, '\n');
235 if (!cp || *++cp == '\n')
236 return buf;
238 start = cp - buf;
239 cp = strchr(cp, '\n');
240 if (!cp)
241 return buf; /* should not happen but be defensive */
242 len = cp + 1 - (buf + start);
244 strbuf_init(&tmp, 0);
245 strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
246 if (is_encoding_utf8(encoding)) {
247 /* we have re-coded to UTF-8; drop the header */
248 strbuf_remove(&tmp, start, len);
249 } else {
250 /* just replaces XXXX in 'encoding XXXX\n' */
251 strbuf_splice(&tmp, start + strlen("encoding "),
252 len - strlen("encoding \n"),
253 encoding, strlen(encoding));
255 return strbuf_detach(&tmp, NULL);
258 static char *logmsg_reencode(const struct commit *commit,
259 const char *output_encoding)
261 static const char *utf8 = "utf-8";
262 const char *use_encoding;
263 char *encoding;
264 char *out;
266 if (!*output_encoding)
267 return NULL;
268 encoding = get_header(commit, "encoding");
269 use_encoding = encoding ? encoding : utf8;
270 if (!strcmp(use_encoding, output_encoding))
271 if (encoding) /* we'll strip encoding header later */
272 out = xstrdup(commit->buffer);
273 else
274 return NULL; /* nothing to do */
275 else
276 out = reencode_string(commit->buffer,
277 output_encoding, use_encoding);
278 if (out)
279 out = replace_encoding_header(out, output_encoding);
281 free(encoding);
282 return out;
285 static void format_person_part(struct strbuf *sb, char part,
286 const char *msg, int len)
288 int start, end, tz = 0;
289 unsigned long date;
290 char *ep;
292 /* parse name */
293 for (end = 0; end < len && msg[end] != '<'; end++)
294 ; /* do nothing */
295 start = end + 1;
296 while (end > 0 && isspace(msg[end - 1]))
297 end--;
298 if (part == 'n') { /* name */
299 strbuf_add(sb, msg, end);
300 return;
303 if (start >= len)
304 return;
306 /* parse email */
307 for (end = start + 1; end < len && msg[end] != '>'; end++)
308 ; /* do nothing */
310 if (end >= len)
311 return;
313 if (part == 'e') { /* email */
314 strbuf_add(sb, msg + start, end - start);
315 return;
318 /* parse date */
319 for (start = end + 1; start < len && isspace(msg[start]); start++)
320 ; /* do nothing */
321 if (start >= len)
322 return;
323 date = strtoul(msg + start, &ep, 10);
324 if (msg + start == ep)
325 return;
327 if (part == 't') { /* date, UNIX timestamp */
328 strbuf_add(sb, msg + start, ep - (msg + start));
329 return;
332 /* parse tz */
333 for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
334 ; /* do nothing */
335 if (start + 1 < len) {
336 tz = strtoul(msg + start + 1, NULL, 10);
337 if (msg[start] == '-')
338 tz = -tz;
341 switch (part) {
342 case 'd': /* date */
343 strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
344 return;
345 case 'D': /* date, RFC2822 style */
346 strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
347 return;
348 case 'r': /* date, relative */
349 strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
350 return;
351 case 'i': /* date, ISO 8601 */
352 strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
353 return;
357 struct chunk {
358 size_t off;
359 size_t len;
362 struct format_commit_context {
363 const struct commit *commit;
365 /* These offsets are relative to the start of the commit message. */
366 int commit_header_parsed;
367 struct chunk subject;
368 struct chunk author;
369 struct chunk committer;
370 struct chunk encoding;
371 size_t body_off;
374 static void parse_commit_header(struct format_commit_context *context)
376 const char *msg = context->commit->buffer;
377 int i;
378 enum { HEADER, SUBJECT, BODY } state;
380 for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
381 int eol;
382 for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
383 ; /* do nothing */
385 if (state == SUBJECT) {
386 context->subject.off = i;
387 context->subject.len = eol - i;
388 i = eol;
390 if (i == eol) {
391 state++;
392 /* strip empty lines */
393 while (msg[eol + 1] == '\n')
394 eol++;
395 } else if (!prefixcmp(msg + i, "author ")) {
396 context->author.off = i + 7;
397 context->author.len = eol - i - 7;
398 } else if (!prefixcmp(msg + i, "committer ")) {
399 context->committer.off = i + 10;
400 context->committer.len = eol - i - 10;
401 } else if (!prefixcmp(msg + i, "encoding ")) {
402 context->encoding.off = i + 9;
403 context->encoding.len = eol - i - 9;
405 i = eol;
407 context->body_off = i;
408 context->commit_header_parsed = 1;
411 static void format_commit_item(struct strbuf *sb, const char *placeholder,
412 void *context)
414 struct format_commit_context *c = context;
415 const struct commit *commit = c->commit;
416 const char *msg = commit->buffer;
417 struct commit_list *p;
419 /* these are independent of the commit */
420 switch (placeholder[0]) {
421 case 'C':
422 switch (placeholder[3]) {
423 case 'd': /* red */
424 strbuf_addstr(sb, "\033[31m");
425 return;
426 case 'e': /* green */
427 strbuf_addstr(sb, "\033[32m");
428 return;
429 case 'u': /* blue */
430 strbuf_addstr(sb, "\033[34m");
431 return;
432 case 's': /* reset color */
433 strbuf_addstr(sb, "\033[m");
434 return;
436 case 'n': /* newline */
437 strbuf_addch(sb, '\n');
438 return;
441 /* these depend on the commit */
442 if (!commit->object.parsed)
443 parse_object(commit->object.sha1);
445 switch (placeholder[0]) {
446 case 'H': /* commit hash */
447 strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
448 return;
449 case 'h': /* abbreviated commit hash */
450 strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
451 DEFAULT_ABBREV));
452 return;
453 case 'T': /* tree hash */
454 strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
455 return;
456 case 't': /* abbreviated tree hash */
457 strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
458 DEFAULT_ABBREV));
459 return;
460 case 'P': /* parent hashes */
461 for (p = commit->parents; p; p = p->next) {
462 if (p != commit->parents)
463 strbuf_addch(sb, ' ');
464 strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
466 return;
467 case 'p': /* abbreviated parent hashes */
468 for (p = commit->parents; p; p = p->next) {
469 if (p != commit->parents)
470 strbuf_addch(sb, ' ');
471 strbuf_addstr(sb, find_unique_abbrev(
472 p->item->object.sha1, DEFAULT_ABBREV));
474 return;
475 case 'm': /* left/right/bottom */
476 strbuf_addch(sb, (commit->object.flags & BOUNDARY)
477 ? '-'
478 : (commit->object.flags & SYMMETRIC_LEFT)
479 ? '<'
480 : '>');
481 return;
484 /* For the rest we have to parse the commit header. */
485 if (!c->commit_header_parsed)
486 parse_commit_header(c);
488 switch (placeholder[0]) {
489 case 's':
490 strbuf_add(sb, msg + c->subject.off, c->subject.len);
491 return;
492 case 'a':
493 format_person_part(sb, placeholder[1],
494 msg + c->author.off, c->author.len);
495 return;
496 case 'c':
497 format_person_part(sb, placeholder[1],
498 msg + c->committer.off, c->committer.len);
499 return;
500 case 'e':
501 strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
502 return;
503 case 'b':
504 strbuf_addstr(sb, msg + c->body_off);
505 return;
509 void format_commit_message(const struct commit *commit,
510 const void *format, struct strbuf *sb)
512 const char *placeholders[] = {
513 "H", /* commit hash */
514 "h", /* abbreviated commit hash */
515 "T", /* tree hash */
516 "t", /* abbreviated tree hash */
517 "P", /* parent hashes */
518 "p", /* abbreviated parent hashes */
519 "an", /* author name */
520 "ae", /* author email */
521 "ad", /* author date */
522 "aD", /* author date, RFC2822 style */
523 "ar", /* author date, relative */
524 "at", /* author date, UNIX timestamp */
525 "ai", /* author date, ISO 8601 */
526 "cn", /* committer name */
527 "ce", /* committer email */
528 "cd", /* committer date */
529 "cD", /* committer date, RFC2822 style */
530 "cr", /* committer date, relative */
531 "ct", /* committer date, UNIX timestamp */
532 "ci", /* committer date, ISO 8601 */
533 "e", /* encoding */
534 "s", /* subject */
535 "b", /* body */
536 "Cred", /* red */
537 "Cgreen", /* green */
538 "Cblue", /* blue */
539 "Creset", /* reset color */
540 "n", /* newline */
541 "m", /* left/right/bottom */
542 NULL
544 struct format_commit_context context;
546 memset(&context, 0, sizeof(context));
547 context.commit = commit;
548 strbuf_expand(sb, format, placeholders, format_commit_item, &context);
551 static void pp_header(enum cmit_fmt fmt,
552 int abbrev,
553 enum date_mode dmode,
554 const char *encoding,
555 const struct commit *commit,
556 const char **msg_p,
557 struct strbuf *sb)
559 int parents_shown = 0;
561 for (;;) {
562 const char *line = *msg_p;
563 int linelen = get_one_line(*msg_p);
565 if (!linelen)
566 return;
567 *msg_p += linelen;
569 if (linelen == 1)
570 /* End of header */
571 return;
573 if (fmt == CMIT_FMT_RAW) {
574 strbuf_add(sb, line, linelen);
575 continue;
578 if (!memcmp(line, "parent ", 7)) {
579 if (linelen != 48)
580 die("bad parent line in commit");
581 continue;
584 if (!parents_shown) {
585 struct commit_list *parent;
586 int num;
587 for (parent = commit->parents, num = 0;
588 parent;
589 parent = parent->next, num++)
591 /* with enough slop */
592 strbuf_grow(sb, num * 50 + 20);
593 add_merge_info(fmt, sb, commit, abbrev);
594 parents_shown = 1;
598 * MEDIUM == DEFAULT shows only author with dates.
599 * FULL shows both authors but not dates.
600 * FULLER shows both authors and dates.
602 if (!memcmp(line, "author ", 7)) {
603 strbuf_grow(sb, linelen + 80);
604 add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
606 if (!memcmp(line, "committer ", 10) &&
607 (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
608 strbuf_grow(sb, linelen + 80);
609 add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
614 static void pp_title_line(enum cmit_fmt fmt,
615 const char **msg_p,
616 struct strbuf *sb,
617 const char *subject,
618 const char *after_subject,
619 const char *encoding,
620 int plain_non_ascii)
622 struct strbuf title;
624 strbuf_init(&title, 80);
626 for (;;) {
627 const char *line = *msg_p;
628 int linelen = get_one_line(line);
630 *msg_p += linelen;
631 if (!linelen || is_empty_line(line, &linelen))
632 break;
634 strbuf_grow(&title, linelen + 2);
635 if (title.len) {
636 if (fmt == CMIT_FMT_EMAIL) {
637 strbuf_addch(&title, '\n');
639 strbuf_addch(&title, ' ');
641 strbuf_add(&title, line, linelen);
644 strbuf_grow(sb, title.len + 1024);
645 if (subject) {
646 strbuf_addstr(sb, subject);
647 add_rfc2047(sb, title.buf, title.len, encoding);
648 } else {
649 strbuf_addbuf(sb, &title);
651 strbuf_addch(sb, '\n');
653 if (plain_non_ascii) {
654 const char *header_fmt =
655 "MIME-Version: 1.0\n"
656 "Content-Type: text/plain; charset=%s\n"
657 "Content-Transfer-Encoding: 8bit\n";
658 strbuf_addf(sb, header_fmt, encoding);
660 if (after_subject) {
661 strbuf_addstr(sb, after_subject);
663 if (fmt == CMIT_FMT_EMAIL) {
664 strbuf_addch(sb, '\n');
666 strbuf_release(&title);
669 static void pp_remainder(enum cmit_fmt fmt,
670 const char **msg_p,
671 struct strbuf *sb,
672 int indent)
674 int first = 1;
675 for (;;) {
676 const char *line = *msg_p;
677 int linelen = get_one_line(line);
678 *msg_p += linelen;
680 if (!linelen)
681 break;
683 if (is_empty_line(line, &linelen)) {
684 if (first)
685 continue;
686 if (fmt == CMIT_FMT_SHORT)
687 break;
689 first = 0;
691 strbuf_grow(sb, linelen + indent + 20);
692 if (indent) {
693 memset(sb->buf + sb->len, ' ', indent);
694 strbuf_setlen(sb, sb->len + indent);
696 strbuf_add(sb, line, linelen);
697 strbuf_addch(sb, '\n');
701 void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
702 struct strbuf *sb, int abbrev,
703 const char *subject, const char *after_subject,
704 enum date_mode dmode, int plain_non_ascii)
706 unsigned long beginning_of_body;
707 int indent = 4;
708 const char *msg = commit->buffer;
709 char *reencoded;
710 const char *encoding;
712 if (fmt == CMIT_FMT_USERFORMAT) {
713 format_commit_message(commit, user_format, sb);
714 return;
717 encoding = (git_log_output_encoding
718 ? git_log_output_encoding
719 : git_commit_encoding);
720 if (!encoding)
721 encoding = "utf-8";
722 reencoded = logmsg_reencode(commit, encoding);
723 if (reencoded) {
724 msg = reencoded;
727 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
728 indent = 0;
730 /* After-subject is used to pass in Content-Type: multipart
731 * MIME header; in that case we do not have to do the
732 * plaintext content type even if the commit message has
733 * non 7-bit ASCII character. Otherwise, check if we need
734 * to say this is not a 7-bit ASCII.
736 if (fmt == CMIT_FMT_EMAIL && !after_subject) {
737 int i, ch, in_body;
739 for (in_body = i = 0; (ch = msg[i]); i++) {
740 if (!in_body) {
741 /* author could be non 7-bit ASCII but
742 * the log may be so; skip over the
743 * header part first.
745 if (ch == '\n' && msg[i+1] == '\n')
746 in_body = 1;
748 else if (non_ascii(ch)) {
749 plain_non_ascii = 1;
750 break;
755 pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);
756 if (fmt != CMIT_FMT_ONELINE && !subject) {
757 strbuf_addch(sb, '\n');
760 /* Skip excess blank lines at the beginning of body, if any... */
761 for (;;) {
762 int linelen = get_one_line(msg);
763 int ll = linelen;
764 if (!linelen)
765 break;
766 if (!is_empty_line(msg, &ll))
767 break;
768 msg += linelen;
771 /* These formats treat the title line specially. */
772 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
773 pp_title_line(fmt, &msg, sb, subject,
774 after_subject, encoding, plain_non_ascii);
776 beginning_of_body = sb->len;
777 if (fmt != CMIT_FMT_ONELINE)
778 pp_remainder(fmt, &msg, sb, indent);
779 strbuf_rtrim(sb);
781 /* Make sure there is an EOLN for the non-oneline case */
782 if (fmt != CMIT_FMT_ONELINE)
783 strbuf_addch(sb, '\n');
786 * The caller may append additional body text in e-mail
787 * format. Make sure we did not strip the blank line
788 * between the header and the body.
790 if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
791 strbuf_addch(sb, '\n');
792 free(reencoded);