--pretty=format: on-demand format expansion
[git/jnareb-git.git] / pretty.c
blob9fbd73f748c6cd250b5e9534168072a1cea88a85
1 #include "cache.h"
2 #include "commit.h"
3 #include "utf8.h"
4 #include "diff.h"
5 #include "revision.h"
7 static struct cmt_fmt_map {
8 const char *n;
9 size_t cmp_len;
10 enum cmit_fmt v;
11 } cmt_fmts[] = {
12 { "raw", 1, CMIT_FMT_RAW },
13 { "medium", 1, CMIT_FMT_MEDIUM },
14 { "short", 1, CMIT_FMT_SHORT },
15 { "email", 1, CMIT_FMT_EMAIL },
16 { "full", 5, CMIT_FMT_FULL },
17 { "fuller", 5, CMIT_FMT_FULLER },
18 { "oneline", 1, CMIT_FMT_ONELINE },
19 { "format:", 7, CMIT_FMT_USERFORMAT},
22 static char *user_format;
24 enum cmit_fmt get_commit_format(const char *arg)
26 int i;
28 if (!arg || !*arg)
29 return CMIT_FMT_DEFAULT;
30 if (*arg == '=')
31 arg++;
32 if (!prefixcmp(arg, "format:")) {
33 if (user_format)
34 free(user_format);
35 user_format = xstrdup(arg + 7);
36 return CMIT_FMT_USERFORMAT;
38 for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {
39 if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&
40 !strncmp(arg, cmt_fmts[i].n, strlen(arg)))
41 return cmt_fmts[i].v;
44 die("invalid --pretty format: %s", arg);
48 * Generic support for pretty-printing the header
50 static int get_one_line(const char *msg)
52 int ret = 0;
54 for (;;) {
55 char c = *msg++;
56 if (!c)
57 break;
58 ret++;
59 if (c == '\n')
60 break;
62 return ret;
65 /* High bit set, or ISO-2022-INT */
66 int non_ascii(int ch)
68 ch = (ch & 0xff);
69 return ((ch & 0x80) || (ch == 0x1b));
72 static int is_rfc2047_special(char ch)
74 return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
77 static void add_rfc2047(struct strbuf *sb, const char *line, int len,
78 const char *encoding)
80 int i, last;
82 for (i = 0; i < len; i++) {
83 int ch = line[i];
84 if (non_ascii(ch))
85 goto needquote;
86 if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
87 goto needquote;
89 strbuf_add(sb, line, len);
90 return;
92 needquote:
93 strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
94 strbuf_addf(sb, "=?%s?q?", encoding);
95 for (i = last = 0; i < len; i++) {
96 unsigned ch = line[i] & 0xFF;
98 * We encode ' ' using '=20' even though rfc2047
99 * allows using '_' for readability. Unfortunately,
100 * many programs do not understand this and just
101 * leave the underscore in place.
103 if (is_rfc2047_special(ch) || ch == ' ') {
104 strbuf_add(sb, line + last, i - last);
105 strbuf_addf(sb, "=%02X", ch);
106 last = i + 1;
109 strbuf_add(sb, line + last, len - last);
110 strbuf_addstr(sb, "?=");
113 static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
114 const char *line, enum date_mode dmode,
115 const char *encoding)
117 char *date;
118 int namelen;
119 unsigned long time;
120 int tz;
121 const char *filler = " ";
123 if (fmt == CMIT_FMT_ONELINE)
124 return;
125 date = strchr(line, '>');
126 if (!date)
127 return;
128 namelen = ++date - line;
129 time = strtoul(date, &date, 10);
130 tz = strtol(date, NULL, 10);
132 if (fmt == CMIT_FMT_EMAIL) {
133 char *name_tail = strchr(line, '<');
134 int display_name_length;
135 if (!name_tail)
136 return;
137 while (line < name_tail && isspace(name_tail[-1]))
138 name_tail--;
139 display_name_length = name_tail - line;
140 filler = "";
141 strbuf_addstr(sb, "From: ");
142 add_rfc2047(sb, line, display_name_length, encoding);
143 strbuf_add(sb, name_tail, namelen - display_name_length);
144 strbuf_addch(sb, '\n');
145 } else {
146 strbuf_addf(sb, "%s: %.*s%.*s\n", what,
147 (fmt == CMIT_FMT_FULLER) ? 4 : 0,
148 filler, namelen, line);
150 switch (fmt) {
151 case CMIT_FMT_MEDIUM:
152 strbuf_addf(sb, "Date: %s\n", show_date(time, tz, dmode));
153 break;
154 case CMIT_FMT_EMAIL:
155 strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));
156 break;
157 case CMIT_FMT_FULLER:
158 strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));
159 break;
160 default:
161 /* notin' */
162 break;
166 static int is_empty_line(const char *line, int *len_p)
168 int len = *len_p;
169 while (len && isspace(line[len-1]))
170 len--;
171 *len_p = len;
172 return !len;
175 static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,
176 const struct commit *commit, int abbrev)
178 struct commit_list *parent = commit->parents;
180 if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) ||
181 !parent || !parent->next)
182 return;
184 strbuf_addstr(sb, "Merge:");
186 while (parent) {
187 struct commit *p = parent->item;
188 const char *hex = NULL;
189 const char *dots;
190 if (abbrev)
191 hex = find_unique_abbrev(p->object.sha1, abbrev);
192 if (!hex)
193 hex = sha1_to_hex(p->object.sha1);
194 dots = (abbrev && strlen(hex) != 40) ? "..." : "";
195 parent = parent->next;
197 strbuf_addf(sb, " %s%s", hex, dots);
199 strbuf_addch(sb, '\n');
202 static char *get_header(const struct commit *commit, const char *key)
204 int key_len = strlen(key);
205 const char *line = commit->buffer;
207 for (;;) {
208 const char *eol = strchr(line, '\n'), *next;
210 if (line == eol)
211 return NULL;
212 if (!eol) {
213 eol = line + strlen(line);
214 next = NULL;
215 } else
216 next = eol + 1;
217 if (eol - line > key_len &&
218 !strncmp(line, key, key_len) &&
219 line[key_len] == ' ') {
220 return xmemdupz(line + key_len + 1, eol - line - key_len - 1);
222 line = next;
226 static char *replace_encoding_header(char *buf, const char *encoding)
228 struct strbuf tmp;
229 size_t start, len;
230 char *cp = buf;
232 /* guess if there is an encoding header before a \n\n */
233 while (strncmp(cp, "encoding ", strlen("encoding "))) {
234 cp = strchr(cp, '\n');
235 if (!cp || *++cp == '\n')
236 return buf;
238 start = cp - buf;
239 cp = strchr(cp, '\n');
240 if (!cp)
241 return buf; /* should not happen but be defensive */
242 len = cp + 1 - (buf + start);
244 strbuf_init(&tmp, 0);
245 strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
246 if (is_encoding_utf8(encoding)) {
247 /* we have re-coded to UTF-8; drop the header */
248 strbuf_remove(&tmp, start, len);
249 } else {
250 /* just replaces XXXX in 'encoding XXXX\n' */
251 strbuf_splice(&tmp, start + strlen("encoding "),
252 len - strlen("encoding \n"),
253 encoding, strlen(encoding));
255 return strbuf_detach(&tmp, NULL);
258 static char *logmsg_reencode(const struct commit *commit,
259 const char *output_encoding)
261 static const char *utf8 = "utf-8";
262 const char *use_encoding;
263 char *encoding;
264 char *out;
266 if (!*output_encoding)
267 return NULL;
268 encoding = get_header(commit, "encoding");
269 use_encoding = encoding ? encoding : utf8;
270 if (!strcmp(use_encoding, output_encoding))
271 if (encoding) /* we'll strip encoding header later */
272 out = xstrdup(commit->buffer);
273 else
274 return NULL; /* nothing to do */
275 else
276 out = reencode_string(commit->buffer,
277 output_encoding, use_encoding);
278 if (out)
279 out = replace_encoding_header(out, output_encoding);
281 free(encoding);
282 return out;
285 static void format_person_part(struct strbuf *sb, char part,
286 const char *msg, int len)
288 int start, end, tz = 0;
289 unsigned long date;
290 char *ep;
292 /* parse name */
293 for (end = 0; end < len && msg[end] != '<'; end++)
294 ; /* do nothing */
295 start = end + 1;
296 while (end > 0 && isspace(msg[end - 1]))
297 end--;
298 if (part == 'n') { /* name */
299 strbuf_add(sb, msg, end);
300 return;
303 if (start >= len)
304 return;
306 /* parse email */
307 for (end = start + 1; end < len && msg[end] != '>'; end++)
308 ; /* do nothing */
310 if (end >= len)
311 return;
313 if (part == 'e') { /* email */
314 strbuf_add(sb, msg + start, end - start);
315 return;
318 /* parse date */
319 for (start = end + 1; start < len && isspace(msg[start]); start++)
320 ; /* do nothing */
321 if (start >= len)
322 return;
323 date = strtoul(msg + start, &ep, 10);
324 if (msg + start == ep)
325 return;
327 if (part == 't') { /* date, UNIX timestamp */
328 strbuf_add(sb, msg + start, ep - (msg + start));
329 return;
332 /* parse tz */
333 for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
334 ; /* do nothing */
335 if (start + 1 < len) {
336 tz = strtoul(msg + start + 1, NULL, 10);
337 if (msg[start] == '-')
338 tz = -tz;
341 switch (part) {
342 case 'd': /* date */
343 strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
344 return;
345 case 'D': /* date, RFC2822 style */
346 strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
347 return;
348 case 'r': /* date, relative */
349 strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
350 return;
351 case 'i': /* date, ISO 8601 */
352 strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
353 return;
357 static void format_commit_item(struct strbuf *sb, const char *placeholder,
358 void *context)
360 const struct commit *commit = context;
361 struct commit_list *p;
362 int i;
363 enum { HEADER, SUBJECT, BODY } state;
364 const char *msg = commit->buffer;
366 /* these are independent of the commit */
367 switch (placeholder[0]) {
368 case 'C':
369 switch (placeholder[3]) {
370 case 'd': /* red */
371 strbuf_addstr(sb, "\033[31m");
372 return;
373 case 'e': /* green */
374 strbuf_addstr(sb, "\033[32m");
375 return;
376 case 'u': /* blue */
377 strbuf_addstr(sb, "\033[34m");
378 return;
379 case 's': /* reset color */
380 strbuf_addstr(sb, "\033[m");
381 return;
383 case 'n': /* newline */
384 strbuf_addch(sb, '\n');
385 return;
388 /* these depend on the commit */
389 if (!commit->object.parsed)
390 parse_object(commit->object.sha1);
392 switch (placeholder[0]) {
393 case 'H': /* commit hash */
394 strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
395 return;
396 case 'h': /* abbreviated commit hash */
397 strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
398 DEFAULT_ABBREV));
399 return;
400 case 'T': /* tree hash */
401 strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
402 return;
403 case 't': /* abbreviated tree hash */
404 strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
405 DEFAULT_ABBREV));
406 return;
407 case 'P': /* parent hashes */
408 for (p = commit->parents; p; p = p->next) {
409 if (p != commit->parents)
410 strbuf_addch(sb, ' ');
411 strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
413 return;
414 case 'p': /* abbreviated parent hashes */
415 for (p = commit->parents; p; p = p->next) {
416 if (p != commit->parents)
417 strbuf_addch(sb, ' ');
418 strbuf_addstr(sb, find_unique_abbrev(
419 p->item->object.sha1, DEFAULT_ABBREV));
421 return;
422 case 'm': /* left/right/bottom */
423 strbuf_addch(sb, (commit->object.flags & BOUNDARY)
424 ? '-'
425 : (commit->object.flags & SYMMETRIC_LEFT)
426 ? '<'
427 : '>');
428 return;
431 /* For the rest we have to parse the commit header. */
432 for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
433 int eol;
434 for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
435 ; /* do nothing */
437 if (state == SUBJECT) {
438 if (placeholder[0] == 's') {
439 strbuf_add(sb, msg + i, eol - i);
440 return;
442 i = eol;
444 if (i == eol) {
445 state++;
446 /* strip empty lines */
447 while (msg[eol + 1] == '\n')
448 eol++;
449 } else if (!prefixcmp(msg + i, "author ")) {
450 if (placeholder[0] == 'a') {
451 format_person_part(sb, placeholder[1],
452 msg + i + 7, eol - i - 7);
453 return;
455 } else if (!prefixcmp(msg + i, "committer ")) {
456 if (placeholder[0] == 'c') {
457 format_person_part(sb, placeholder[1],
458 msg + i + 10, eol - i - 10);
459 return;
461 } else if (!prefixcmp(msg + i, "encoding ")) {
462 if (placeholder[0] == 'e') {
463 strbuf_add(sb, msg + i + 9, eol - i - 9);
464 return;
467 i = eol;
469 if (msg[i] && placeholder[0] == 'b') /* body */
470 strbuf_addstr(sb, msg + i);
473 void format_commit_message(const struct commit *commit,
474 const void *format, struct strbuf *sb)
476 const char *placeholders[] = {
477 "H", /* commit hash */
478 "h", /* abbreviated commit hash */
479 "T", /* tree hash */
480 "t", /* abbreviated tree hash */
481 "P", /* parent hashes */
482 "p", /* abbreviated parent hashes */
483 "an", /* author name */
484 "ae", /* author email */
485 "ad", /* author date */
486 "aD", /* author date, RFC2822 style */
487 "ar", /* author date, relative */
488 "at", /* author date, UNIX timestamp */
489 "ai", /* author date, ISO 8601 */
490 "cn", /* committer name */
491 "ce", /* committer email */
492 "cd", /* committer date */
493 "cD", /* committer date, RFC2822 style */
494 "cr", /* committer date, relative */
495 "ct", /* committer date, UNIX timestamp */
496 "ci", /* committer date, ISO 8601 */
497 "e", /* encoding */
498 "s", /* subject */
499 "b", /* body */
500 "Cred", /* red */
501 "Cgreen", /* green */
502 "Cblue", /* blue */
503 "Creset", /* reset color */
504 "n", /* newline */
505 "m", /* left/right/bottom */
506 NULL
508 strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
511 static void pp_header(enum cmit_fmt fmt,
512 int abbrev,
513 enum date_mode dmode,
514 const char *encoding,
515 const struct commit *commit,
516 const char **msg_p,
517 struct strbuf *sb)
519 int parents_shown = 0;
521 for (;;) {
522 const char *line = *msg_p;
523 int linelen = get_one_line(*msg_p);
525 if (!linelen)
526 return;
527 *msg_p += linelen;
529 if (linelen == 1)
530 /* End of header */
531 return;
533 if (fmt == CMIT_FMT_RAW) {
534 strbuf_add(sb, line, linelen);
535 continue;
538 if (!memcmp(line, "parent ", 7)) {
539 if (linelen != 48)
540 die("bad parent line in commit");
541 continue;
544 if (!parents_shown) {
545 struct commit_list *parent;
546 int num;
547 for (parent = commit->parents, num = 0;
548 parent;
549 parent = parent->next, num++)
551 /* with enough slop */
552 strbuf_grow(sb, num * 50 + 20);
553 add_merge_info(fmt, sb, commit, abbrev);
554 parents_shown = 1;
558 * MEDIUM == DEFAULT shows only author with dates.
559 * FULL shows both authors but not dates.
560 * FULLER shows both authors and dates.
562 if (!memcmp(line, "author ", 7)) {
563 strbuf_grow(sb, linelen + 80);
564 add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
566 if (!memcmp(line, "committer ", 10) &&
567 (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
568 strbuf_grow(sb, linelen + 80);
569 add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
574 static void pp_title_line(enum cmit_fmt fmt,
575 const char **msg_p,
576 struct strbuf *sb,
577 const char *subject,
578 const char *after_subject,
579 const char *encoding,
580 int plain_non_ascii)
582 struct strbuf title;
584 strbuf_init(&title, 80);
586 for (;;) {
587 const char *line = *msg_p;
588 int linelen = get_one_line(line);
590 *msg_p += linelen;
591 if (!linelen || is_empty_line(line, &linelen))
592 break;
594 strbuf_grow(&title, linelen + 2);
595 if (title.len) {
596 if (fmt == CMIT_FMT_EMAIL) {
597 strbuf_addch(&title, '\n');
599 strbuf_addch(&title, ' ');
601 strbuf_add(&title, line, linelen);
604 strbuf_grow(sb, title.len + 1024);
605 if (subject) {
606 strbuf_addstr(sb, subject);
607 add_rfc2047(sb, title.buf, title.len, encoding);
608 } else {
609 strbuf_addbuf(sb, &title);
611 strbuf_addch(sb, '\n');
613 if (plain_non_ascii) {
614 const char *header_fmt =
615 "MIME-Version: 1.0\n"
616 "Content-Type: text/plain; charset=%s\n"
617 "Content-Transfer-Encoding: 8bit\n";
618 strbuf_addf(sb, header_fmt, encoding);
620 if (after_subject) {
621 strbuf_addstr(sb, after_subject);
623 if (fmt == CMIT_FMT_EMAIL) {
624 strbuf_addch(sb, '\n');
626 strbuf_release(&title);
629 static void pp_remainder(enum cmit_fmt fmt,
630 const char **msg_p,
631 struct strbuf *sb,
632 int indent)
634 int first = 1;
635 for (;;) {
636 const char *line = *msg_p;
637 int linelen = get_one_line(line);
638 *msg_p += linelen;
640 if (!linelen)
641 break;
643 if (is_empty_line(line, &linelen)) {
644 if (first)
645 continue;
646 if (fmt == CMIT_FMT_SHORT)
647 break;
649 first = 0;
651 strbuf_grow(sb, linelen + indent + 20);
652 if (indent) {
653 memset(sb->buf + sb->len, ' ', indent);
654 strbuf_setlen(sb, sb->len + indent);
656 strbuf_add(sb, line, linelen);
657 strbuf_addch(sb, '\n');
661 void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
662 struct strbuf *sb, int abbrev,
663 const char *subject, const char *after_subject,
664 enum date_mode dmode, int plain_non_ascii)
666 unsigned long beginning_of_body;
667 int indent = 4;
668 const char *msg = commit->buffer;
669 char *reencoded;
670 const char *encoding;
672 if (fmt == CMIT_FMT_USERFORMAT) {
673 format_commit_message(commit, user_format, sb);
674 return;
677 encoding = (git_log_output_encoding
678 ? git_log_output_encoding
679 : git_commit_encoding);
680 if (!encoding)
681 encoding = "utf-8";
682 reencoded = logmsg_reencode(commit, encoding);
683 if (reencoded) {
684 msg = reencoded;
687 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
688 indent = 0;
690 /* After-subject is used to pass in Content-Type: multipart
691 * MIME header; in that case we do not have to do the
692 * plaintext content type even if the commit message has
693 * non 7-bit ASCII character. Otherwise, check if we need
694 * to say this is not a 7-bit ASCII.
696 if (fmt == CMIT_FMT_EMAIL && !after_subject) {
697 int i, ch, in_body;
699 for (in_body = i = 0; (ch = msg[i]); i++) {
700 if (!in_body) {
701 /* author could be non 7-bit ASCII but
702 * the log may be so; skip over the
703 * header part first.
705 if (ch == '\n' && msg[i+1] == '\n')
706 in_body = 1;
708 else if (non_ascii(ch)) {
709 plain_non_ascii = 1;
710 break;
715 pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);
716 if (fmt != CMIT_FMT_ONELINE && !subject) {
717 strbuf_addch(sb, '\n');
720 /* Skip excess blank lines at the beginning of body, if any... */
721 for (;;) {
722 int linelen = get_one_line(msg);
723 int ll = linelen;
724 if (!linelen)
725 break;
726 if (!is_empty_line(msg, &ll))
727 break;
728 msg += linelen;
731 /* These formats treat the title line specially. */
732 if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
733 pp_title_line(fmt, &msg, sb, subject,
734 after_subject, encoding, plain_non_ascii);
736 beginning_of_body = sb->len;
737 if (fmt != CMIT_FMT_ONELINE)
738 pp_remainder(fmt, &msg, sb, indent);
739 strbuf_rtrim(sb);
741 /* Make sure there is an EOLN for the non-oneline case */
742 if (fmt != CMIT_FMT_ONELINE)
743 strbuf_addch(sb, '\n');
746 * The caller may append additional body text in e-mail
747 * format. Make sure we did not strip the blank line
748 * between the header and the body.
750 if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
751 strbuf_addch(sb, '\n');
752 free(reencoded);