pretty.c

   1 #include "cache.h"
   2 #include "alloc.h"
   3 #include "config.h"
   4 #include "commit.h"
   5 #include "hex.h"
   6 #include "utf8.h"
   7 #include "diff.h"
   8 #include "revision.h"
   9 #include "string-list.h"
  10 #include "mailmap.h"
  11 #include "log-tree.h"
  12 #include "notes.h"
  13 #include "color.h"
  14 #include "reflog-walk.h"
  15 #include "gpg-interface.h"
  16 #include "trailer.h"
  17 #include "run-command.h"
  18
  19 /*
  20  * The limit for formatting directives, which enable the caller to append
  21  * arbitrarily many bytes to the formatted buffer. This includes padding
  22  * and wrapping formatters.
  23  */
  24 #define FORMATTING_LIMIT (16 * 1024)
  25
  26 static char *user_format;
  27 static struct cmt_fmt_map {
  28         const char *name;
  29         enum cmit_fmt format;
  30         int is_tformat;
  31         int expand_tabs_in_log;
  32         int is_alias;
  33         enum date_mode_type default_date_mode_type;
  34         const char *user_format;
  35 } *commit_formats;
  36 static size_t builtin_formats_len;
  37 static size_t commit_formats_len;
  38 static size_t commit_formats_alloc;
  39 static struct cmt_fmt_map *find_commit_format(const char *sought);
  40
  41 int commit_format_is_empty(enum cmit_fmt fmt)
  42 {
  43         return fmt == CMIT_FMT_USERFORMAT && !*user_format;
  44 }
  45
  46 static void save_user_format(struct rev_info *rev, const char *cp, int is_tformat)
  47 {
  48         free(user_format);
  49         user_format = xstrdup(cp);
  50         if (is_tformat)
  51                 rev->use_terminator = 1;
  52         rev->commit_format = CMIT_FMT_USERFORMAT;
  53 }
  54
  55 static int git_pretty_formats_config(const char *var, const char *value,
  56                                      void *cb UNUSED)
  57 {
  58         struct cmt_fmt_map *commit_format = NULL;
  59         const char *name;
  60         const char *fmt;
  61         int i;
  62
  63         if (!skip_prefix(var, "pretty.", &name))
  64                 return 0;
  65
  66         for (i = 0; i < builtin_formats_len; i++) {
  67                 if (!strcmp(commit_formats[i].name, name))
  68                         return 0;
  69         }
  70
  71         for (i = builtin_formats_len; i < commit_formats_len; i++) {
  72                 if (!strcmp(commit_formats[i].name, name)) {
  73                         commit_format = &commit_formats[i];
  74                         break;
  75                 }
  76         }
  77
  78         if (!commit_format) {
  79                 ALLOC_GROW(commit_formats, commit_formats_len+1,
  80                            commit_formats_alloc);
  81                 commit_format = &commit_formats[commit_formats_len];
  82                 memset(commit_format, 0, sizeof(*commit_format));
  83                 commit_formats_len++;
  84         }
  85
  86         commit_format->name = xstrdup(name);
  87         commit_format->format = CMIT_FMT_USERFORMAT;
  88         if (git_config_string(&fmt, var, value))
  89                 return -1;
  90
  91         if (skip_prefix(fmt, "format:", &fmt))
  92                 commit_format->is_tformat = 0;
  93         else if (skip_prefix(fmt, "tformat:", &fmt) || strchr(fmt, '%'))
  94                 commit_format->is_tformat = 1;
  95         else
  96                 commit_format->is_alias = 1;
  97         commit_format->user_format = fmt;
  98
  99         return 0;
 100 }
 101
 102 static void setup_commit_formats(void)
 103 {
 104         struct cmt_fmt_map builtin_formats[] = {
 105                 { "raw",        CMIT_FMT_RAW,           0,      0 },
 106                 { "medium",     CMIT_FMT_MEDIUM,        0,      8 },
 107                 { "short",      CMIT_FMT_SHORT,         0,      0 },
 108                 { "email",      CMIT_FMT_EMAIL,         0,      0 },
 109                 { "mboxrd",     CMIT_FMT_MBOXRD,        0,      0 },
 110                 { "fuller",     CMIT_FMT_FULLER,        0,      8 },
 111                 { "full",       CMIT_FMT_FULL,          0,      8 },
 112                 { "oneline",    CMIT_FMT_ONELINE,       1,      0 },
 113                 { "reference",  CMIT_FMT_USERFORMAT,    1,      0,
 114                         0, DATE_SHORT, "%C(auto)%h (%s, %ad)" },
 115                 /*
 116                  * Please update $__git_log_pretty_formats in
 117                  * git-completion.bash when you add new formats.
 118                  */
 119         };
 120         commit_formats_len = ARRAY_SIZE(builtin_formats);
 121         builtin_formats_len = commit_formats_len;
 122         ALLOC_GROW(commit_formats, commit_formats_len, commit_formats_alloc);
 123         COPY_ARRAY(commit_formats, builtin_formats,
 124                    ARRAY_SIZE(builtin_formats));
 125
 126         git_config(git_pretty_formats_config, NULL);
 127 }
 128
 129 static struct cmt_fmt_map *find_commit_format_recursive(const char *sought,
 130                                                         const char *original,
 131                                                         int num_redirections)
 132 {
 133         struct cmt_fmt_map *found = NULL;
 134         size_t found_match_len = 0;
 135         int i;
 136
 137         if (num_redirections >= commit_formats_len)
 138                 die("invalid --pretty format: "
 139                     "'%s' references an alias which points to itself",
 140                     original);
 141
 142         for (i = 0; i < commit_formats_len; i++) {
 143                 size_t match_len;
 144
 145                 if (!starts_with(commit_formats[i].name, sought))
 146                         continue;
 147
 148                 match_len = strlen(commit_formats[i].name);
 149                 if (found == NULL || found_match_len > match_len) {
 150                         found = &commit_formats[i];
 151                         found_match_len = match_len;
 152                 }
 153         }
 154
 155         if (found && found->is_alias) {
 156                 found = find_commit_format_recursive(found->user_format,
 157                                                      original,
 158                                                      num_redirections+1);
 159         }
 160
 161         return found;
 162 }
 163
 164 static struct cmt_fmt_map *find_commit_format(const char *sought)
 165 {
 166         if (!commit_formats)
 167                 setup_commit_formats();
 168
 169         return find_commit_format_recursive(sought, sought, 0);
 170 }
 171
 172 void get_commit_format(const char *arg, struct rev_info *rev)
 173 {
 174         struct cmt_fmt_map *commit_format;
 175
 176         rev->use_terminator = 0;
 177         if (!arg) {
 178                 rev->commit_format = CMIT_FMT_DEFAULT;
 179                 return;
 180         }
 181         if (skip_prefix(arg, "format:", &arg)) {
 182                 save_user_format(rev, arg, 0);
 183                 return;
 184         }
 185
 186         if (!*arg || skip_prefix(arg, "tformat:", &arg) || strchr(arg, '%')) {
 187                 save_user_format(rev, arg, 1);
 188                 return;
 189         }
 190
 191         commit_format = find_commit_format(arg);
 192         if (!commit_format)
 193                 die("invalid --pretty format: %s", arg);
 194
 195         rev->commit_format = commit_format->format;
 196         rev->use_terminator = commit_format->is_tformat;
 197         rev->expand_tabs_in_log_default = commit_format->expand_tabs_in_log;
 198         if (!rev->date_mode_explicit && commit_format->default_date_mode_type)
 199                 rev->date_mode.type = commit_format->default_date_mode_type;
 200         if (commit_format->format == CMIT_FMT_USERFORMAT) {
 201                 save_user_format(rev, commit_format->user_format,
 202                                  commit_format->is_tformat);
 203         }
 204 }
 205
 206 /*
 207  * Generic support for pretty-printing the header
 208  */
 209 static int get_one_line(const char *msg)
 210 {
 211         int ret = 0;
 212
 213         for (;;) {
 214                 char c = *msg++;
 215                 if (!c)
 216                         break;
 217                 ret++;
 218                 if (c == '\n')
 219                         break;
 220         }
 221         return ret;
 222 }
 223
 224 /* High bit set, or ISO-2022-INT */
 225 static int non_ascii(int ch)
 226 {
 227         return !isascii(ch) || ch == '\033';
 228 }
 229
 230 int has_non_ascii(const char *s)
 231 {
 232         int ch;
 233         if (!s)
 234                 return 0;
 235         while ((ch = *s++) != '\0') {
 236                 if (non_ascii(ch))
 237                         return 1;
 238         }
 239         return 0;
 240 }
 241
 242 static int is_rfc822_special(char ch)
 243 {
 244         switch (ch) {
 245         case '(':
 246         case ')':
 247         case '<':
 248         case '>':
 249         case '[':
 250         case ']':
 251         case ':':
 252         case ';':
 253         case '@':
 254         case ',':
 255         case '.':
 256         case '"':
 257         case '\\':
 258                 return 1;
 259         default:
 260                 return 0;
 261         }
 262 }
 263
 264 static int needs_rfc822_quoting(const char *s, int len)
 265 {
 266         int i;
 267         for (i = 0; i < len; i++)
 268                 if (is_rfc822_special(s[i]))
 269                         return 1;
 270         return 0;
 271 }
 272
 273 static int last_line_length(struct strbuf *sb)
 274 {
 275         int i;
 276
 277         /* How many bytes are already used on the last line? */
 278         for (i = sb->len - 1; i >= 0; i--)
 279                 if (sb->buf[i] == '\n')
 280                         break;
 281         return sb->len - (i + 1);
 282 }
 283
 284 static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
 285 {
 286         int i;
 287
 288         /* just a guess, we may have to also backslash-quote */
 289         strbuf_grow(out, len + 2);
 290
 291         strbuf_addch(out, '"');
 292         for (i = 0; i < len; i++) {
 293                 switch (s[i]) {
 294                 case '"':
 295                 case '\\':
 296                         strbuf_addch(out, '\\');
 297                         /* fall through */
 298                 default:
 299                         strbuf_addch(out, s[i]);
 300                 }
 301         }
 302         strbuf_addch(out, '"');
 303 }
 304
 305 enum rfc2047_type {
 306         RFC2047_SUBJECT,
 307         RFC2047_ADDRESS
 308 };
 309
 310 static int is_rfc2047_special(char ch, enum rfc2047_type type)
 311 {
 312         /*
 313          * rfc2047, section 4.2:
 314          *
 315          *    8-bit values which correspond to printable ASCII characters other
 316          *    than "=", "?", and "_" (underscore), MAY be represented as those
 317          *    characters.  (But see section 5 for restrictions.)  In
 318          *    particular, SPACE and TAB MUST NOT be represented as themselves
 319          *    within encoded words.
 320          */
 321
 322         /*
 323          * rule out non-ASCII characters and non-printable characters (the
 324          * non-ASCII check should be redundant as isprint() is not localized
 325          * and only knows about ASCII, but be defensive about that)
 326          */
 327         if (non_ascii(ch) || !isprint(ch))
 328                 return 1;
 329
 330         /*
 331          * rule out special printable characters (' ' should be the only
 332          * whitespace character considered printable, but be defensive and use
 333          * isspace())
 334          */
 335         if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
 336                 return 1;
 337
 338         /*
 339          * rfc2047, section 5.3:
 340          *
 341          *    As a replacement for a 'word' entity within a 'phrase', for example,
 342          *    one that precedes an address in a From, To, or Cc header.  The ABNF
 343          *    definition for 'phrase' from RFC 822 thus becomes:
 344          *
 345          *    phrase = 1*( encoded-word / word )
 346          *
 347          *    In this case the set of characters that may be used in a "Q"-encoded
 348          *    'encoded-word' is restricted to: <upper and lower case ASCII
 349          *    letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
 350          *    (underscore, ASCII 95.)>.  An 'encoded-word' that appears within a
 351          *    'phrase' MUST be separated from any adjacent 'word', 'text' or
 352          *    'special' by 'linear-white-space'.
 353          */
 354
 355         if (type != RFC2047_ADDRESS)
 356                 return 0;
 357
 358         /* '=' and '_' are special cases and have been checked above */
 359         return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
 360 }
 361
 362 static int needs_rfc2047_encoding(const char *line, int len)
 363 {
 364         int i;
 365
 366         for (i = 0; i < len; i++) {
 367                 int ch = line[i];
 368                 if (non_ascii(ch) || ch == '\n')
 369                         return 1;
 370                 if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
 371                         return 1;
 372         }
 373
 374         return 0;
 375 }
 376
 377 static void add_rfc2047(struct strbuf *sb, const char *line, size_t len,
 378                        const char *encoding, enum rfc2047_type type)
 379 {
 380         static const int max_encoded_length = 76; /* per rfc2047 */
 381         int i;
 382         int line_len = last_line_length(sb);
 383
 384         strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
 385         strbuf_addf(sb, "=?%s?q?", encoding);
 386         line_len += strlen(encoding) + 5; /* 5 for =??q? */
 387
 388         while (len) {
 389                 /*
 390                  * RFC 2047, section 5 (3):
 391                  *
 392                  * Each 'encoded-word' MUST represent an integral number of
 393                  * characters.  A multi-octet character may not be split across
 394                  * adjacent 'encoded- word's.
 395                  */
 396                 const unsigned char *p = (const unsigned char *)line;
 397                 int chrlen = mbs_chrlen(&line, &len, encoding);
 398                 int is_special = (chrlen > 1) || is_rfc2047_special(*p, type);
 399
 400                 /* "=%02X" * chrlen, or the byte itself */
 401                 const char *encoded_fmt = is_special ? "=%02X"    : "%c";
 402                 int         encoded_len = is_special ? 3 * chrlen : 1;
 403
 404                 /*
 405                  * According to RFC 2047, we could encode the special character
 406                  * ' ' (space) with '_' (underscore) for readability. But many
 407                  * programs do not understand this and just leave the
 408                  * underscore in place. Thus, we do nothing special here, which
 409                  * causes ' ' to be encoded as '=20', avoiding this problem.
 410                  */
 411
 412                 if (line_len + encoded_len + 2 > max_encoded_length) {
 413                         /* It won't fit with trailing "?=" --- break the line */
 414                         strbuf_addf(sb, "?=\n =?%s?q?", encoding);
 415                         line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */
 416                 }
 417
 418                 for (i = 0; i < chrlen; i++)
 419                         strbuf_addf(sb, encoded_fmt, p[i]);
 420                 line_len += encoded_len;
 421         }
 422         strbuf_addstr(sb, "?=");
 423 }
 424
 425 const char *show_ident_date(const struct ident_split *ident,
 426                             const struct date_mode *mode)
 427 {
 428         timestamp_t date = 0;
 429         long tz = 0;
 430
 431         if (ident->date_begin && ident->date_end)
 432                 date = parse_timestamp(ident->date_begin, NULL, 10);
 433         if (date_overflows(date))
 434                 date = 0;
 435         else {
 436                 if (ident->tz_begin && ident->tz_end)
 437                         tz = strtol(ident->tz_begin, NULL, 10);
 438                 if (tz >= INT_MAX || tz <= INT_MIN)
 439                         tz = 0;
 440         }
 441         return show_date(date, tz, mode);
 442 }
 443
 444 static inline void strbuf_add_with_color(struct strbuf *sb, const char *color,
 445                                          const char *buf, size_t buflen)
 446 {
 447         strbuf_addstr(sb, color);
 448         strbuf_add(sb, buf, buflen);
 449         if (*color)
 450                 strbuf_addstr(sb, GIT_COLOR_RESET);
 451 }
 452
 453 static void append_line_with_color(struct strbuf *sb, struct grep_opt *opt,
 454                                    const char *line, size_t linelen,
 455                                    int color, enum grep_context ctx,
 456                                    enum grep_header_field field)
 457 {
 458         const char *buf, *eol, *line_color, *match_color;
 459         regmatch_t match;
 460         int eflags = 0;
 461
 462         buf = line;
 463         eol = buf + linelen;
 464
 465         if (!opt || !want_color(color) || opt->invert)
 466                 goto end;
 467
 468         line_color = opt->colors[GREP_COLOR_SELECTED];
 469         match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
 470
 471         while (grep_next_match(opt, buf, eol, ctx, &match, field, eflags)) {
 472                 if (match.rm_so == match.rm_eo)
 473                         break;
 474
 475                 strbuf_add_with_color(sb, line_color, buf, match.rm_so);
 476                 strbuf_add_with_color(sb, match_color, buf + match.rm_so,
 477                                       match.rm_eo - match.rm_so);
 478                 buf += match.rm_eo;
 479                 eflags = REG_NOTBOL;
 480         }
 481
 482         if (eflags)
 483                 strbuf_add_with_color(sb, line_color, buf, eol - buf);
 484         else {
 485 end:
 486                 strbuf_add(sb, buf, eol - buf);
 487         }
 488 }
 489
 490 static int use_in_body_from(const struct pretty_print_context *pp,
 491                             const struct ident_split *ident)
 492 {
 493         if (pp->rev && pp->rev->force_in_body_from)
 494                 return 1;
 495         if (ident_cmp(pp->from_ident, ident))
 496                 return 1;
 497         return 0;
 498 }
 499
 500 void pp_user_info(struct pretty_print_context *pp,
 501                   const char *what, struct strbuf *sb,
 502                   const char *line, const char *encoding)
 503 {
 504         struct ident_split ident;
 505         char *line_end;
 506         const char *mailbuf, *namebuf;
 507         size_t namelen, maillen;
 508         int max_length = 78; /* per rfc2822 */
 509
 510         if (pp->fmt == CMIT_FMT_ONELINE)
 511                 return;
 512
 513         line_end = strchrnul(line, '\n');
 514         if (split_ident_line(&ident, line, line_end - line))
 515                 return;
 516
 517         mailbuf = ident.mail_begin;
 518         maillen = ident.mail_end - ident.mail_begin;
 519         namebuf = ident.name_begin;
 520         namelen = ident.name_end - ident.name_begin;
 521
 522         if (pp->mailmap)
 523                 map_user(pp->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
 524
 525         if (cmit_fmt_is_mail(pp->fmt)) {
 526                 if (pp->from_ident && use_in_body_from(pp, &ident)) {
 527                         struct strbuf buf = STRBUF_INIT;
 528
 529                         strbuf_addstr(&buf, "From: ");
 530                         strbuf_add(&buf, namebuf, namelen);
 531                         strbuf_addstr(&buf, " <");
 532                         strbuf_add(&buf, mailbuf, maillen);
 533                         strbuf_addstr(&buf, ">\n");
 534                         string_list_append(&pp->in_body_headers,
 535                                            strbuf_detach(&buf, NULL));
 536
 537                         mailbuf = pp->from_ident->mail_begin;
 538                         maillen = pp->from_ident->mail_end - mailbuf;
 539                         namebuf = pp->from_ident->name_begin;
 540                         namelen = pp->from_ident->name_end - namebuf;
 541                 }
 542
 543                 strbuf_addstr(sb, "From: ");
 544                 if (pp->encode_email_headers &&
 545                     needs_rfc2047_encoding(namebuf, namelen)) {
 546                         add_rfc2047(sb, namebuf, namelen,
 547                                     encoding, RFC2047_ADDRESS);
 548                         max_length = 76; /* per rfc2047 */
 549                 } else if (needs_rfc822_quoting(namebuf, namelen)) {
 550                         struct strbuf quoted = STRBUF_INIT;
 551                         add_rfc822_quoted(&quoted, namebuf, namelen);
 552                         strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len,
 553                                                         -6, 1, max_length);
 554                         strbuf_release(&quoted);
 555                 } else {
 556                         strbuf_add_wrapped_bytes(sb, namebuf, namelen,
 557                                                  -6, 1, max_length);
 558                 }
 559
 560                 if (max_length <
 561                     last_line_length(sb) + strlen(" <") + maillen + strlen(">"))
 562                         strbuf_addch(sb, '\n');
 563                 strbuf_addf(sb, " <%.*s>\n", (int)maillen, mailbuf);
 564         } else {
 565                 struct strbuf id = STRBUF_INIT;
 566                 enum grep_header_field field = GREP_HEADER_FIELD_MAX;
 567                 struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
 568
 569                 if (!strcmp(what, "Author"))
 570                         field = GREP_HEADER_AUTHOR;
 571                 else if (!strcmp(what, "Commit"))
 572                         field = GREP_HEADER_COMMITTER;
 573
 574                 strbuf_addf(sb, "%s: ", what);
 575                 if (pp->fmt == CMIT_FMT_FULLER)
 576                         strbuf_addchars(sb, ' ', 4);
 577
 578                 strbuf_addf(&id, "%.*s <%.*s>", (int)namelen, namebuf,
 579                             (int)maillen, mailbuf);
 580
 581                 append_line_with_color(sb, opt, id.buf, id.len, pp->color,
 582                                        GREP_CONTEXT_HEAD, field);
 583                 strbuf_addch(sb, '\n');
 584                 strbuf_release(&id);
 585         }
 586
 587         switch (pp->fmt) {
 588         case CMIT_FMT_MEDIUM:
 589                 strbuf_addf(sb, "Date:   %s\n",
 590                             show_ident_date(&ident, &pp->date_mode));
 591                 break;
 592         case CMIT_FMT_EMAIL:
 593         case CMIT_FMT_MBOXRD:
 594                 strbuf_addf(sb, "Date: %s\n",
 595                             show_ident_date(&ident, DATE_MODE(RFC2822)));
 596                 break;
 597         case CMIT_FMT_FULLER:
 598                 strbuf_addf(sb, "%sDate: %s\n", what,
 599                             show_ident_date(&ident, &pp->date_mode));
 600                 break;
 601         default:
 602                 /* notin' */
 603                 break;
 604         }
 605 }
 606
 607 static int is_blank_line(const char *line, int *len_p)
 608 {
 609         int len = *len_p;
 610         while (len && isspace(line[len - 1]))
 611                 len--;
 612         *len_p = len;
 613         return !len;
 614 }
 615
 616 const char *skip_blank_lines(const char *msg)
 617 {
 618         for (;;) {
 619                 int linelen = get_one_line(msg);
 620                 int ll = linelen;
 621                 if (!linelen)
 622                         break;
 623                 if (!is_blank_line(msg, &ll))
 624                         break;
 625                 msg += linelen;
 626         }
 627         return msg;
 628 }
 629
 630 static void add_merge_info(const struct pretty_print_context *pp,
 631                            struct strbuf *sb, const struct commit *commit)
 632 {
 633         struct commit_list *parent = commit->parents;
 634
 635         if ((pp->fmt == CMIT_FMT_ONELINE) || (cmit_fmt_is_mail(pp->fmt)) ||
 636             !parent || !parent->next)
 637                 return;
 638
 639         strbuf_addstr(sb, "Merge:");
 640
 641         while (parent) {
 642                 struct object_id *oidp = &parent->item->object.oid;
 643                 strbuf_addch(sb, ' ');
 644                 if (pp->abbrev)
 645                         strbuf_add_unique_abbrev(sb, oidp, pp->abbrev);
 646                 else
 647                         strbuf_addstr(sb, oid_to_hex(oidp));
 648                 parent = parent->next;
 649         }
 650         strbuf_addch(sb, '\n');
 651 }
 652
 653 static char *get_header(const char *msg, const char *key)
 654 {
 655         size_t len;
 656         const char *v = find_commit_header(msg, key, &len);
 657         return v ? xmemdupz(v, len) : NULL;
 658 }
 659
 660 static char *replace_encoding_header(char *buf, const char *encoding)
 661 {
 662         struct strbuf tmp = STRBUF_INIT;
 663         size_t start, len;
 664         char *cp = buf;
 665
 666         /* guess if there is an encoding header before a \n\n */
 667         while (!starts_with(cp, "encoding ")) {
 668                 cp = strchr(cp, '\n');
 669                 if (!cp || *++cp == '\n')
 670                         return buf;
 671         }
 672         start = cp - buf;
 673         cp = strchr(cp, '\n');
 674         if (!cp)
 675                 return buf; /* should not happen but be defensive */
 676         len = cp + 1 - (buf + start);
 677
 678         strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
 679         if (is_encoding_utf8(encoding)) {
 680                 /* we have re-coded to UTF-8; drop the header */
 681                 strbuf_remove(&tmp, start, len);
 682         } else {
 683                 /* just replaces XXXX in 'encoding XXXX\n' */
 684                 strbuf_splice(&tmp, start + strlen("encoding "),
 685                                           len - strlen("encoding \n"),
 686                                           encoding, strlen(encoding));
 687         }
 688         return strbuf_detach(&tmp, NULL);
 689 }
 690
 691 const char *repo_logmsg_reencode(struct repository *r,
 692                                  const struct commit *commit,
 693                                  char **commit_encoding,
 694                                  const char *output_encoding)
 695 {
 696         static const char *utf8 = "UTF-8";
 697         const char *use_encoding;
 698         char *encoding;
 699         const char *msg = repo_get_commit_buffer(r, commit, NULL);
 700         char *out;
 701
 702         if (!output_encoding || !*output_encoding) {
 703                 if (commit_encoding)
 704                         *commit_encoding = get_header(msg, "encoding");
 705                 return msg;
 706         }
 707         encoding = get_header(msg, "encoding");
 708         if (commit_encoding)
 709                 *commit_encoding = encoding;
 710         use_encoding = encoding ? encoding : utf8;
 711         if (same_encoding(use_encoding, output_encoding)) {
 712                 /*
 713                  * No encoding work to be done. If we have no encoding header
 714                  * at all, then there's nothing to do, and we can return the
 715                  * message verbatim (whether newly allocated or not).
 716                  */
 717                 if (!encoding)
 718                         return msg;
 719
 720                 /*
 721                  * Otherwise, we still want to munge the encoding header in the
 722                  * result, which will be done by modifying the buffer. If we
 723                  * are using a fresh copy, we can reuse it. But if we are using
 724                  * the cached copy from get_commit_buffer, we need to duplicate it
 725                  * to avoid munging the cached copy.
 726                  */
 727                 if (msg == get_cached_commit_buffer(r, commit, NULL))
 728                         out = xstrdup(msg);
 729                 else
 730                         out = (char *)msg;
 731         }
 732         else {
 733                 /*
 734                  * There's actual encoding work to do. Do the reencoding, which
 735                  * still leaves the header to be replaced in the next step. At
 736                  * this point, we are done with msg. If we allocated a fresh
 737                  * copy, we can free it.
 738                  */
 739                 out = reencode_string(msg, output_encoding, use_encoding);
 740                 if (out)
 741                         repo_unuse_commit_buffer(r, commit, msg);
 742         }
 743
 744         /*
 745          * This replacement actually consumes the buffer we hand it, so we do
 746          * not have to worry about freeing the old "out" here.
 747          */
 748         if (out)
 749                 out = replace_encoding_header(out, output_encoding);
 750
 751         if (!commit_encoding)
 752                 free(encoding);
 753         /*
 754          * If the re-encoding failed, out might be NULL here; in that
 755          * case we just return the commit message verbatim.
 756          */
 757         return out ? out : msg;
 758 }
 759
 760 static int mailmap_name(const char **email, size_t *email_len,
 761                         const char **name, size_t *name_len)
 762 {
 763         static struct string_list *mail_map;
 764         if (!mail_map) {
 765                 CALLOC_ARRAY(mail_map, 1);
 766                 read_mailmap(mail_map);
 767         }
 768         return mail_map->nr && map_user(mail_map, email, email_len, name, name_len);
 769 }
 770
 771 static size_t format_person_part(struct strbuf *sb, char part,
 772                                  const char *msg, int len,
 773                                  const struct date_mode *dmode)
 774 {
 775         /* currently all placeholders have same length */
 776         const int placeholder_len = 2;
 777         struct ident_split s;
 778         const char *name, *mail;
 779         size_t maillen, namelen;
 780
 781         if (split_ident_line(&s, msg, len) < 0)
 782                 goto skip;
 783
 784         name = s.name_begin;
 785         namelen = s.name_end - s.name_begin;
 786         mail = s.mail_begin;
 787         maillen = s.mail_end - s.mail_begin;
 788
 789         if (part == 'N' || part == 'E' || part == 'L') /* mailmap lookup */
 790                 mailmap_name(&mail, &maillen, &name, &namelen);
 791         if (part == 'n' || part == 'N') {       /* name */
 792                 strbuf_add(sb, name, namelen);
 793                 return placeholder_len;
 794         }
 795         if (part == 'e' || part == 'E') {       /* email */
 796                 strbuf_add(sb, mail, maillen);
 797                 return placeholder_len;
 798         }
 799         if (part == 'l' || part == 'L') {       /* local-part */
 800                 const char *at = memchr(mail, '@', maillen);
 801                 if (at)
 802                         maillen = at - mail;
 803                 strbuf_add(sb, mail, maillen);
 804                 return placeholder_len;
 805         }
 806
 807         if (!s.date_begin)
 808                 goto skip;
 809
 810         if (part == 't') {      /* date, UNIX timestamp */
 811                 strbuf_add(sb, s.date_begin, s.date_end - s.date_begin);
 812                 return placeholder_len;
 813         }
 814
 815         switch (part) {
 816         case 'd':       /* date */
 817                 strbuf_addstr(sb, show_ident_date(&s, dmode));
 818                 return placeholder_len;
 819         case 'D':       /* date, RFC2822 style */
 820                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(RFC2822)));
 821                 return placeholder_len;
 822         case 'r':       /* date, relative */
 823                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(RELATIVE)));
 824                 return placeholder_len;
 825         case 'i':       /* date, ISO 8601-like */
 826                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(ISO8601)));
 827                 return placeholder_len;
 828         case 'I':       /* date, ISO 8601 strict */
 829                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(ISO8601_STRICT)));
 830                 return placeholder_len;
 831         case 'h':       /* date, human */
 832                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(HUMAN)));
 833                 return placeholder_len;
 834         case 's':
 835                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(SHORT)));
 836                 return placeholder_len;
 837         }
 838
 839 skip:
 840         /*
 841          * reading from either a bogus commit, or a reflog entry with
 842          * %gn, %ge, etc.; 'sb' cannot be updated, but we still need
 843          * to compute a valid return value.
 844          */
 845         if (part == 'n' || part == 'e' || part == 't' || part == 'd'
 846             || part == 'D' || part == 'r' || part == 'i')
 847                 return placeholder_len;
 848
 849         return 0; /* unknown placeholder */
 850 }
 851
 852 struct chunk {
 853         size_t off;
 854         size_t len;
 855 };
 856
 857 enum flush_type {
 858         no_flush,
 859         flush_right,
 860         flush_left,
 861         flush_left_and_steal,
 862         flush_both
 863 };
 864
 865 enum trunc_type {
 866         trunc_none,
 867         trunc_left,
 868         trunc_middle,
 869         trunc_right
 870 };
 871
 872 struct format_commit_context {
 873         struct repository *repository;
 874         const struct commit *commit;
 875         const struct pretty_print_context *pretty_ctx;
 876         unsigned commit_header_parsed:1;
 877         unsigned commit_message_parsed:1;
 878         struct signature_check signature_check;
 879         enum flush_type flush_type;
 880         enum trunc_type truncate;
 881         const char *message;
 882         char *commit_encoding;
 883         size_t width, indent1, indent2;
 884         int auto_color;
 885         int padding;
 886
 887         /* These offsets are relative to the start of the commit message. */
 888         struct chunk author;
 889         struct chunk committer;
 890         size_t message_off;
 891         size_t subject_off;
 892         size_t body_off;
 893
 894         /* The following ones are relative to the result struct strbuf. */
 895         size_t wrap_start;
 896 };
 897
 898 static void parse_commit_header(struct format_commit_context *context)
 899 {
 900         const char *msg = context->message;
 901         int i;
 902
 903         for (i = 0; msg[i]; i++) {
 904                 const char *name;
 905                 int eol;
 906                 for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
 907                         ; /* do nothing */
 908
 909                 if (i == eol) {
 910                         break;
 911                 } else if (skip_prefix(msg + i, "author ", &name)) {
 912                         context->author.off = name - msg;
 913                         context->author.len = msg + eol - name;
 914                 } else if (skip_prefix(msg + i, "committer ", &name)) {
 915                         context->committer.off = name - msg;
 916                         context->committer.len = msg + eol - name;
 917                 }
 918                 i = eol;
 919         }
 920         context->message_off = i;
 921         context->commit_header_parsed = 1;
 922 }
 923
 924 static int istitlechar(char c)
 925 {
 926         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
 927                 (c >= '0' && c <= '9') || c == '.' || c == '_';
 928 }
 929
 930 void format_sanitized_subject(struct strbuf *sb, const char *msg, size_t len)
 931 {
 932         size_t trimlen;
 933         size_t start_len = sb->len;
 934         int space = 2;
 935         int i;
 936
 937         for (i = 0; i < len; i++) {
 938                 if (istitlechar(msg[i])) {
 939                         if (space == 1)
 940                                 strbuf_addch(sb, '-');
 941                         space = 0;
 942                         strbuf_addch(sb, msg[i]);
 943                         if (msg[i] == '.')
 944                                 while (msg[i+1] == '.')
 945                                         i++;
 946                 } else
 947                         space |= 1;
 948         }
 949
 950         /* trim any trailing '.' or '-' characters */
 951         trimlen = 0;
 952         while (sb->len - trimlen > start_len &&
 953                 (sb->buf[sb->len - 1 - trimlen] == '.'
 954                 || sb->buf[sb->len - 1 - trimlen] == '-'))
 955                 trimlen++;
 956         strbuf_remove(sb, sb->len - trimlen, trimlen);
 957 }
 958
 959 const char *format_subject(struct strbuf *sb, const char *msg,
 960                            const char *line_separator)
 961 {
 962         int first = 1;
 963
 964         for (;;) {
 965                 const char *line = msg;
 966                 int linelen = get_one_line(line);
 967
 968                 msg += linelen;
 969                 if (!linelen || is_blank_line(line, &linelen))
 970                         break;
 971
 972                 if (!sb)
 973                         continue;
 974                 strbuf_grow(sb, linelen + 2);
 975                 if (!first)
 976                         strbuf_addstr(sb, line_separator);
 977                 strbuf_add(sb, line, linelen);
 978                 first = 0;
 979         }
 980         return msg;
 981 }
 982
 983 static void parse_commit_message(struct format_commit_context *c)
 984 {
 985         const char *msg = c->message + c->message_off;
 986         const char *start = c->message;
 987
 988         msg = skip_blank_lines(msg);
 989         c->subject_off = msg - start;
 990
 991         msg = format_subject(NULL, msg, NULL);
 992         msg = skip_blank_lines(msg);
 993         c->body_off = msg - start;
 994
 995         c->commit_message_parsed = 1;
 996 }
 997
 998 static void strbuf_wrap(struct strbuf *sb, size_t pos,
 999                         size_t width, size_t indent1, size_t indent2)
1000 {
1001         struct strbuf tmp = STRBUF_INIT;
1002
1003         if (pos)
1004                 strbuf_add(&tmp, sb->buf, pos);
1005         strbuf_add_wrapped_text(&tmp, sb->buf + pos,
1006                                 cast_size_t_to_int(indent1),
1007                                 cast_size_t_to_int(indent2),
1008                                 cast_size_t_to_int(width));
1009         strbuf_swap(&tmp, sb);
1010         strbuf_release(&tmp);
1011 }
1012
1013 static void rewrap_message_tail(struct strbuf *sb,
1014                                 struct format_commit_context *c,
1015                                 size_t new_width, size_t new_indent1,
1016                                 size_t new_indent2)
1017 {
1018         if (c->width == new_width && c->indent1 == new_indent1 &&
1019             c->indent2 == new_indent2)
1020                 return;
1021         if (c->wrap_start < sb->len)
1022                 strbuf_wrap(sb, c->wrap_start, c->width, c->indent1, c->indent2);
1023         c->wrap_start = sb->len;
1024         c->width = new_width;
1025         c->indent1 = new_indent1;
1026         c->indent2 = new_indent2;
1027 }
1028
1029 static int format_reflog_person(struct strbuf *sb,
1030                                 char part,
1031                                 struct reflog_walk_info *log,
1032                                 const struct date_mode *dmode)
1033 {
1034         const char *ident;
1035
1036         if (!log)
1037                 return 2;
1038
1039         ident = get_reflog_ident(log);
1040         if (!ident)
1041                 return 2;
1042
1043         return format_person_part(sb, part, ident, strlen(ident), dmode);
1044 }
1045
1046 static size_t parse_color(struct strbuf *sb, /* in UTF-8 */
1047                           const char *placeholder,
1048                           struct format_commit_context *c)
1049 {
1050         const char *rest = placeholder;
1051         const char *basic_color = NULL;
1052
1053         if (placeholder[1] == '(') {
1054                 const char *begin = placeholder + 2;
1055                 const char *end = strchr(begin, ')');
1056                 char color[COLOR_MAXLEN];
1057
1058                 if (!end)
1059                         return 0;
1060
1061                 if (skip_prefix(begin, "auto,", &begin)) {
1062                         if (!want_color(c->pretty_ctx->color))
1063                                 return end - placeholder + 1;
1064                 } else if (skip_prefix(begin, "always,", &begin)) {
1065                         /* nothing to do; we do not respect want_color at all */
1066                 } else {
1067                         /* the default is the same as "auto" */
1068                         if (!want_color(c->pretty_ctx->color))
1069                                 return end - placeholder + 1;
1070                 }
1071
1072                 if (color_parse_mem(begin, end - begin, color) < 0)
1073                         die(_("unable to parse --pretty format"));
1074                 strbuf_addstr(sb, color);
1075                 return end - placeholder + 1;
1076         }
1077
1078         /*
1079          * We handle things like "%C(red)" above; for historical reasons, there
1080          * are a few colors that can be specified without parentheses (and
1081          * they cannot support things like "auto" or "always" at all).
1082          */
1083         if (skip_prefix(placeholder + 1, "red", &rest))
1084                 basic_color = GIT_COLOR_RED;
1085         else if (skip_prefix(placeholder + 1, "green", &rest))
1086                 basic_color = GIT_COLOR_GREEN;
1087         else if (skip_prefix(placeholder + 1, "blue", &rest))
1088                 basic_color = GIT_COLOR_BLUE;
1089         else if (skip_prefix(placeholder + 1, "reset", &rest))
1090                 basic_color = GIT_COLOR_RESET;
1091
1092         if (basic_color && want_color(c->pretty_ctx->color))
1093                 strbuf_addstr(sb, basic_color);
1094
1095         return rest - placeholder;
1096 }
1097
1098 static size_t parse_padding_placeholder(const char *placeholder,
1099                                         struct format_commit_context *c)
1100 {
1101         const char *ch = placeholder;
1102         enum flush_type flush_type;
1103         int to_column = 0;
1104
1105         switch (*ch++) {
1106         case '<':
1107                 flush_type = flush_right;
1108                 break;
1109         case '>':
1110                 if (*ch == '<') {
1111                         flush_type = flush_both;
1112                         ch++;
1113                 } else if (*ch == '>') {
1114                         flush_type = flush_left_and_steal;
1115                         ch++;
1116                 } else
1117                         flush_type = flush_left;
1118                 break;
1119         default:
1120                 return 0;
1121         }
1122
1123         /* the next value means "wide enough to that column" */
1124         if (*ch == '|') {
1125                 to_column = 1;
1126                 ch++;
1127         }
1128
1129         if (*ch == '(') {
1130                 const char *start = ch + 1;
1131                 const char *end = start + strcspn(start, ",)");
1132                 char *next;
1133                 int width;
1134                 if (!*end || end == start)
1135                         return 0;
1136                 width = strtol(start, &next, 10);
1137
1138                 /*
1139                  * We need to limit the amount of padding, or otherwise this
1140                  * would allow the user to pad the buffer by arbitrarily many
1141                  * bytes and thus cause resource exhaustion.
1142                  */
1143                 if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT)
1144                         return 0;
1145
1146                 if (next == start || width == 0)
1147                         return 0;
1148                 if (width < 0) {
1149                         if (to_column)
1150                                 width += term_columns();
1151                         if (width < 0)
1152                                 return 0;
1153                 }
1154                 c->padding = to_column ? -width : width;
1155                 c->flush_type = flush_type;
1156
1157                 if (*end == ',') {
1158                         start = end + 1;
1159                         end = strchr(start, ')');
1160                         if (!end || end == start)
1161                                 return 0;
1162                         if (starts_with(start, "trunc)"))
1163                                 c->truncate = trunc_right;
1164                         else if (starts_with(start, "ltrunc)"))
1165                                 c->truncate = trunc_left;
1166                         else if (starts_with(start, "mtrunc)"))
1167                                 c->truncate = trunc_middle;
1168                         else
1169                                 return 0;
1170                 } else
1171                         c->truncate = trunc_none;
1172
1173                 return end - placeholder + 1;
1174         }
1175         return 0;
1176 }
1177
1178 static int match_placeholder_arg_value(const char *to_parse, const char *candidate,
1179                                        const char **end, const char **valuestart,
1180                                        size_t *valuelen)
1181 {
1182         const char *p;
1183
1184         if (!(skip_prefix(to_parse, candidate, &p)))
1185                 return 0;
1186         if (valuestart) {
1187                 if (*p == '=') {
1188                         *valuestart = p + 1;
1189                         *valuelen = strcspn(*valuestart, ",)");
1190                         p = *valuestart + *valuelen;
1191                 } else {
1192                         if (*p != ',' && *p != ')')
1193                                 return 0;
1194                         *valuestart = NULL;
1195                         *valuelen = 0;
1196                 }
1197         }
1198         if (*p == ',') {
1199                 *end = p + 1;
1200                 return 1;
1201         }
1202         if (*p == ')') {
1203                 *end = p;
1204                 return 1;
1205         }
1206         return 0;
1207 }
1208
1209 static int match_placeholder_bool_arg(const char *to_parse, const char *candidate,
1210                                       const char **end, int *val)
1211 {
1212         const char *argval;
1213         char *strval;
1214         size_t arglen;
1215         int v;
1216
1217         if (!match_placeholder_arg_value(to_parse, candidate, end, &argval, &arglen))
1218                 return 0;
1219
1220         if (!argval) {
1221                 *val = 1;
1222                 return 1;
1223         }
1224
1225         strval = xstrndup(argval, arglen);
1226         v = git_parse_maybe_bool(strval);
1227         free(strval);
1228
1229         if (v == -1)
1230                 return 0;
1231
1232         *val = v;
1233
1234         return 1;
1235 }
1236
1237 static int format_trailer_match_cb(const struct strbuf *key, void *ud)
1238 {
1239         const struct string_list *list = ud;
1240         const struct string_list_item *item;
1241
1242         for_each_string_list_item (item, list) {
1243                 if (key->len == (uintptr_t)item->util &&
1244                     !strncasecmp(item->string, key->buf, key->len))
1245                         return 1;
1246         }
1247         return 0;
1248 }
1249
1250 int format_set_trailers_options(struct process_trailer_options *opts,
1251                                 struct string_list *filter_list,
1252                                 struct strbuf *sepbuf,
1253                                 struct strbuf *kvsepbuf,
1254                                 const char **arg,
1255                                 char **invalid_arg)
1256 {
1257         for (;;) {
1258                 const char *argval;
1259                 size_t arglen;
1260
1261                 if (**arg == ')')
1262                         break;
1263
1264                 if (match_placeholder_arg_value(*arg, "key", arg, &argval, &arglen)) {
1265                         uintptr_t len = arglen;
1266
1267                         if (!argval)
1268                                 return -1;
1269
1270                         if (len && argval[len - 1] == ':')
1271                                 len--;
1272                         string_list_append(filter_list, argval)->util = (char *)len;
1273
1274                         opts->filter = format_trailer_match_cb;
1275                         opts->filter_data = filter_list;
1276                         opts->only_trailers = 1;
1277                 } else if (match_placeholder_arg_value(*arg, "separator", arg, &argval, &arglen)) {
1278                         char *fmt;
1279
1280                         strbuf_reset(sepbuf);
1281                         fmt = xstrndup(argval, arglen);
1282                         strbuf_expand(sepbuf, fmt, strbuf_expand_literal_cb, NULL);
1283                         free(fmt);
1284                         opts->separator = sepbuf;
1285                 } else if (match_placeholder_arg_value(*arg, "key_value_separator", arg, &argval, &arglen)) {
1286                         char *fmt;
1287
1288                         strbuf_reset(kvsepbuf);
1289                         fmt = xstrndup(argval, arglen);
1290                         strbuf_expand(kvsepbuf, fmt, strbuf_expand_literal_cb, NULL);
1291                         free(fmt);
1292                         opts->key_value_separator = kvsepbuf;
1293                 } else if (!match_placeholder_bool_arg(*arg, "only", arg, &opts->only_trailers) &&
1294                            !match_placeholder_bool_arg(*arg, "unfold", arg, &opts->unfold) &&
1295                            !match_placeholder_bool_arg(*arg, "keyonly", arg, &opts->key_only) &&
1296                            !match_placeholder_bool_arg(*arg, "valueonly", arg, &opts->value_only)) {
1297                         if (invalid_arg) {
1298                                 size_t len = strcspn(*arg, ",)");
1299                                 *invalid_arg = xstrndup(*arg, len);
1300                         }
1301                         return -1;
1302                 }
1303         }
1304         return 0;
1305 }
1306
1307 static size_t parse_describe_args(const char *start, struct strvec *args)
1308 {
1309         struct {
1310                 char *name;
1311                 enum {
1312                         DESCRIBE_ARG_BOOL,
1313                         DESCRIBE_ARG_INTEGER,
1314                         DESCRIBE_ARG_STRING,
1315                 } type;
1316         }  option[] = {
1317                 { "tags", DESCRIBE_ARG_BOOL},
1318                 { "abbrev", DESCRIBE_ARG_INTEGER },
1319                 { "exclude", DESCRIBE_ARG_STRING },
1320                 { "match", DESCRIBE_ARG_STRING },
1321         };
1322         const char *arg = start;
1323
1324         for (;;) {
1325                 int found = 0;
1326                 const char *argval;
1327                 size_t arglen = 0;
1328                 int optval = 0;
1329                 int i;
1330
1331                 for (i = 0; !found && i < ARRAY_SIZE(option); i++) {
1332                         switch (option[i].type) {
1333                         case DESCRIBE_ARG_BOOL:
1334                                 if (match_placeholder_bool_arg(arg, option[i].name, &arg, &optval)) {
1335                                         if (optval)
1336                                                 strvec_pushf(args, "--%s", option[i].name);
1337                                         else
1338                                                 strvec_pushf(args, "--no-%s", option[i].name);
1339                                         found = 1;
1340                                 }
1341                                 break;
1342                         case DESCRIBE_ARG_INTEGER:
1343                                 if (match_placeholder_arg_value(arg, option[i].name, &arg,
1344                                                                 &argval, &arglen)) {
1345                                         char *endptr;
1346                                         if (!arglen)
1347                                                 return 0;
1348                                         strtol(argval, &endptr, 10);
1349                                         if (endptr - argval != arglen)
1350                                                 return 0;
1351                                         strvec_pushf(args, "--%s=%.*s", option[i].name, (int)arglen, argval);
1352                                         found = 1;
1353                                 }
1354                                 break;
1355                         case DESCRIBE_ARG_STRING:
1356                                 if (match_placeholder_arg_value(arg, option[i].name, &arg,
1357                                                                 &argval, &arglen)) {
1358                                         if (!arglen)
1359                                                 return 0;
1360                                         strvec_pushf(args, "--%s=%.*s", option[i].name, (int)arglen, argval);
1361                                         found = 1;
1362                                 }
1363                                 break;
1364                         }
1365                 }
1366                 if (!found)
1367                         break;
1368
1369         }
1370         return arg - start;
1371 }
1372
1373 static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
1374                                 const char *placeholder,
1375                                 void *context)
1376 {
1377         struct format_commit_context *c = context;
1378         const struct commit *commit = c->commit;
1379         const char *msg = c->message;
1380         struct commit_list *p;
1381         const char *arg, *eol;
1382         size_t res;
1383         char **slot;
1384
1385         /* these are independent of the commit */
1386         res = strbuf_expand_literal_cb(sb, placeholder, NULL);
1387         if (res)
1388                 return res;
1389
1390         switch (placeholder[0]) {
1391         case 'C':
1392                 if (starts_with(placeholder + 1, "(auto)")) {
1393                         c->auto_color = want_color(c->pretty_ctx->color);
1394                         if (c->auto_color && sb->len)
1395                                 strbuf_addstr(sb, GIT_COLOR_RESET);
1396                         return 7; /* consumed 7 bytes, "C(auto)" */
1397                 } else {
1398                         int ret = parse_color(sb, placeholder, c);
1399                         if (ret)
1400                                 c->auto_color = 0;
1401                         /*
1402                          * Otherwise, we decided to treat %C<unknown>
1403                          * as a literal string, and the previous
1404                          * %C(auto) is still valid.
1405                          */
1406                         return ret;
1407                 }
1408         case 'w':
1409                 if (placeholder[1] == '(') {
1410                         unsigned long width = 0, indent1 = 0, indent2 = 0;
1411                         char *next;
1412                         const char *start = placeholder + 2;
1413                         const char *end = strchr(start, ')');
1414                         if (!end)
1415                                 return 0;
1416                         if (end > start) {
1417                                 width = strtoul(start, &next, 10);
1418                                 if (*next == ',') {
1419                                         indent1 = strtoul(next + 1, &next, 10);
1420                                         if (*next == ',') {
1421                                                 indent2 = strtoul(next + 1,
1422                                                                  &next, 10);
1423                                         }
1424                                 }
1425                                 if (*next != ')')
1426                                         return 0;
1427                         }
1428
1429                         /*
1430                          * We need to limit the format here as it allows the
1431                          * user to prepend arbitrarily many bytes to the buffer
1432                          * when rewrapping.
1433                          */
1434                         if (width > FORMATTING_LIMIT ||
1435                             indent1 > FORMATTING_LIMIT ||
1436                             indent2 > FORMATTING_LIMIT)
1437                                 return 0;
1438                         rewrap_message_tail(sb, c, width, indent1, indent2);
1439                         return end - placeholder + 1;
1440                 } else
1441                         return 0;
1442
1443         case '<':
1444         case '>':
1445                 return parse_padding_placeholder(placeholder, c);
1446         }
1447
1448         if (skip_prefix(placeholder, "(describe", &arg)) {
1449                 struct child_process cmd = CHILD_PROCESS_INIT;
1450                 struct strbuf out = STRBUF_INIT;
1451                 struct strbuf err = STRBUF_INIT;
1452                 struct pretty_print_describe_status *describe_status;
1453
1454                 describe_status = c->pretty_ctx->describe_status;
1455                 if (describe_status) {
1456                         if (!describe_status->max_invocations)
1457                                 return 0;
1458                         describe_status->max_invocations--;
1459                 }
1460
1461                 cmd.git_cmd = 1;
1462                 strvec_push(&cmd.args, "describe");
1463
1464                 if (*arg == ':') {
1465                         arg++;
1466                         arg += parse_describe_args(arg, &cmd.args);
1467                 }
1468
1469                 if (*arg != ')') {
1470                         child_process_clear(&cmd);
1471                         return 0;
1472                 }
1473
1474                 strvec_push(&cmd.args, oid_to_hex(&commit->object.oid));
1475                 pipe_command(&cmd, NULL, 0, &out, 0, &err, 0);
1476                 strbuf_rtrim(&out);
1477                 strbuf_addbuf(sb, &out);
1478                 strbuf_release(&out);
1479                 strbuf_release(&err);
1480                 return arg - placeholder + 1;
1481         }
1482
1483         /* these depend on the commit */
1484         if (!commit->object.parsed)
1485                 parse_object(the_repository, &commit->object.oid);
1486
1487         switch (placeholder[0]) {
1488         case 'H':               /* commit hash */
1489                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
1490                 strbuf_addstr(sb, oid_to_hex(&commit->object.oid));
1491                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
1492                 return 1;
1493         case 'h':               /* abbreviated commit hash */
1494                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
1495                 strbuf_add_unique_abbrev(sb, &commit->object.oid,
1496                                          c->pretty_ctx->abbrev);
1497                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
1498                 return 1;
1499         case 'T':               /* tree hash */
1500                 strbuf_addstr(sb, oid_to_hex(get_commit_tree_oid(commit)));
1501                 return 1;
1502         case 't':               /* abbreviated tree hash */
1503                 strbuf_add_unique_abbrev(sb,
1504                                          get_commit_tree_oid(commit),
1505                                          c->pretty_ctx->abbrev);
1506                 return 1;
1507         case 'P':               /* parent hashes */
1508                 for (p = commit->parents; p; p = p->next) {
1509                         if (p != commit->parents)
1510                                 strbuf_addch(sb, ' ');
1511                         strbuf_addstr(sb, oid_to_hex(&p->item->object.oid));
1512                 }
1513                 return 1;
1514         case 'p':               /* abbreviated parent hashes */
1515                 for (p = commit->parents; p; p = p->next) {
1516                         if (p != commit->parents)
1517                                 strbuf_addch(sb, ' ');
1518                         strbuf_add_unique_abbrev(sb, &p->item->object.oid,
1519                                                  c->pretty_ctx->abbrev);
1520                 }
1521                 return 1;
1522         case 'm':               /* left/right/bottom */
1523                 strbuf_addstr(sb, get_revision_mark(NULL, commit));
1524                 return 1;
1525         case 'd':
1526                 format_decorations(sb, commit, c->auto_color);
1527                 return 1;
1528         case 'D':
1529                 format_decorations_extended(sb, commit, c->auto_color, "", ", ", "");
1530                 return 1;
1531         case 'S':               /* tag/branch like --source */
1532                 if (!(c->pretty_ctx->rev && c->pretty_ctx->rev->sources))
1533                         return 0;
1534                 slot = revision_sources_at(c->pretty_ctx->rev->sources, commit);
1535                 if (!(slot && *slot))
1536                         return 0;
1537                 strbuf_addstr(sb, *slot);
1538                 return 1;
1539         case 'g':               /* reflog info */
1540                 switch(placeholder[1]) {
1541                 case 'd':       /* reflog selector */
1542                 case 'D':
1543                         if (c->pretty_ctx->reflog_info)
1544                                 get_reflog_selector(sb,
1545                                                     c->pretty_ctx->reflog_info,
1546                                                     &c->pretty_ctx->date_mode,
1547                                                     c->pretty_ctx->date_mode_explicit,
1548                                                     (placeholder[1] == 'd'));
1549                         return 2;
1550                 case 's':       /* reflog message */
1551                         if (c->pretty_ctx->reflog_info)
1552                                 get_reflog_message(sb, c->pretty_ctx->reflog_info);
1553                         return 2;
1554                 case 'n':
1555                 case 'N':
1556                 case 'e':
1557                 case 'E':
1558                         return format_reflog_person(sb,
1559                                                     placeholder[1],
1560                                                     c->pretty_ctx->reflog_info,
1561                                                     &c->pretty_ctx->date_mode);
1562                 }
1563                 return 0;       /* unknown %g placeholder */
1564         case 'N':
1565                 if (c->pretty_ctx->notes_message) {
1566                         strbuf_addstr(sb, c->pretty_ctx->notes_message);
1567                         return 1;
1568                 }
1569                 return 0;
1570         }
1571
1572         if (placeholder[0] == 'G') {
1573                 if (!c->signature_check.result)
1574                         check_commit_signature(c->commit, &(c->signature_check));
1575                 switch (placeholder[1]) {
1576                 case 'G':
1577                         if (c->signature_check.output)
1578                                 strbuf_addstr(sb, c->signature_check.output);
1579                         break;
1580                 case '?':
1581                         switch (c->signature_check.result) {
1582                         case 'G':
1583                                 switch (c->signature_check.trust_level) {
1584                                 case TRUST_UNDEFINED:
1585                                 case TRUST_NEVER:
1586                                         strbuf_addch(sb, 'U');
1587                                         break;
1588                                 default:
1589                                         strbuf_addch(sb, 'G');
1590                                         break;
1591                                 }
1592                                 break;
1593                         case 'B':
1594                         case 'E':
1595                         case 'N':
1596                         case 'X':
1597                         case 'Y':
1598                         case 'R':
1599                                 strbuf_addch(sb, c->signature_check.result);
1600                         }
1601                         break;
1602                 case 'S':
1603                         if (c->signature_check.signer)
1604                                 strbuf_addstr(sb, c->signature_check.signer);
1605                         break;
1606                 case 'K':
1607                         if (c->signature_check.key)
1608                                 strbuf_addstr(sb, c->signature_check.key);
1609                         break;
1610                 case 'F':
1611                         if (c->signature_check.fingerprint)
1612                                 strbuf_addstr(sb, c->signature_check.fingerprint);
1613                         break;
1614                 case 'P':
1615                         if (c->signature_check.primary_key_fingerprint)
1616                                 strbuf_addstr(sb, c->signature_check.primary_key_fingerprint);
1617                         break;
1618                 case 'T':
1619                         strbuf_addstr(sb, gpg_trust_level_to_str(c->signature_check.trust_level));
1620                         break;
1621                 default:
1622                         return 0;
1623                 }
1624                 return 2;
1625         }
1626
1627         /* For the rest we have to parse the commit header. */
1628         if (!c->commit_header_parsed) {
1629                 msg = c->message =
1630                         repo_logmsg_reencode(c->repository, commit,
1631                                              &c->commit_encoding, "UTF-8");
1632                 parse_commit_header(c);
1633         }
1634
1635         switch (placeholder[0]) {
1636         case 'a':       /* author ... */
1637                 return format_person_part(sb, placeholder[1],
1638                                    msg + c->author.off, c->author.len,
1639                                    &c->pretty_ctx->date_mode);
1640         case 'c':       /* committer ... */
1641                 return format_person_part(sb, placeholder[1],
1642                                    msg + c->committer.off, c->committer.len,
1643                                    &c->pretty_ctx->date_mode);
1644         case 'e':       /* encoding */
1645                 if (c->commit_encoding)
1646                         strbuf_addstr(sb, c->commit_encoding);
1647                 return 1;
1648         case 'B':       /* raw body */
1649                 /* message_off is always left at the initial newline */
1650                 strbuf_addstr(sb, msg + c->message_off + 1);
1651                 return 1;
1652         }
1653
1654         /* Now we need to parse the commit message. */
1655         if (!c->commit_message_parsed)
1656                 parse_commit_message(c);
1657
1658         switch (placeholder[0]) {
1659         case 's':       /* subject */
1660                 format_subject(sb, msg + c->subject_off, " ");
1661                 return 1;
1662         case 'f':       /* sanitized subject */
1663                 eol = strchrnul(msg + c->subject_off, '\n');
1664                 format_sanitized_subject(sb, msg + c->subject_off, eol - (msg + c->subject_off));
1665                 return 1;
1666         case 'b':       /* body */
1667                 strbuf_addstr(sb, msg + c->body_off);
1668                 return 1;
1669         }
1670
1671         if (skip_prefix(placeholder, "(trailers", &arg)) {
1672                 struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
1673                 struct string_list filter_list = STRING_LIST_INIT_NODUP;
1674                 struct strbuf sepbuf = STRBUF_INIT;
1675                 struct strbuf kvsepbuf = STRBUF_INIT;
1676                 size_t ret = 0;
1677
1678                 opts.no_divider = 1;
1679
1680                 if (*arg == ':') {
1681                         arg++;
1682                         if (format_set_trailers_options(&opts, &filter_list, &sepbuf, &kvsepbuf, &arg, NULL))
1683                                 goto trailer_out;
1684                 }
1685                 if (*arg == ')') {
1686                         format_trailers_from_commit(sb, msg + c->subject_off, &opts);
1687                         ret = arg - placeholder + 1;
1688                 }
1689         trailer_out:
1690                 string_list_clear(&filter_list, 0);
1691                 strbuf_release(&sepbuf);
1692                 return ret;
1693         }
1694
1695         return 0;       /* unknown placeholder */
1696 }
1697
1698 static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
1699                                     const char *placeholder,
1700                                     struct format_commit_context *c)
1701 {
1702         struct strbuf local_sb = STRBUF_INIT;
1703         size_t total_consumed = 0;
1704         int len, padding = c->padding;
1705
1706         if (padding < 0) {
1707                 const char *start = strrchr(sb->buf, '\n');
1708                 int occupied;
1709                 if (!start)
1710                         start = sb->buf;
1711                 occupied = utf8_strnwidth(start, strlen(start), 1);
1712                 occupied += c->pretty_ctx->graph_width;
1713                 padding = (-padding) - occupied;
1714         }
1715         while (1) {
1716                 int modifier = *placeholder == 'C';
1717                 size_t consumed = format_commit_one(&local_sb, placeholder, c);
1718                 total_consumed += consumed;
1719
1720                 if (!modifier)
1721                         break;
1722
1723                 placeholder += consumed;
1724                 if (*placeholder != '%')
1725                         break;
1726                 placeholder++;
1727                 total_consumed++;
1728         }
1729         len = utf8_strnwidth(local_sb.buf, local_sb.len, 1);
1730
1731         if (c->flush_type == flush_left_and_steal) {
1732                 const char *ch = sb->buf + sb->len - 1;
1733                 while (len > padding && ch > sb->buf) {
1734                         const char *p;
1735                         if (*ch == ' ') {
1736                                 ch--;
1737                                 padding++;
1738                                 continue;
1739                         }
1740                         /* check for trailing ansi sequences */
1741                         if (*ch != 'm')
1742                                 break;
1743                         p = ch - 1;
1744                         while (p > sb->buf && ch - p < 10 && *p != '\033')
1745                                 p--;
1746                         if (*p != '\033' ||
1747                             ch + 1 - p != display_mode_esc_sequence_len(p))
1748                                 break;
1749                         /*
1750                          * got a good ansi sequence, put it back to
1751                          * local_sb as we're cutting sb
1752                          */
1753                         strbuf_insert(&local_sb, 0, p, ch + 1 - p);
1754                         ch = p - 1;
1755                 }
1756                 strbuf_setlen(sb, ch + 1 - sb->buf);
1757                 c->flush_type = flush_left;
1758         }
1759
1760         if (len > padding) {
1761                 switch (c->truncate) {
1762                 case trunc_left:
1763                         strbuf_utf8_replace(&local_sb,
1764                                             0, len - (padding - 2),
1765                                             "..");
1766                         break;
1767                 case trunc_middle:
1768                         strbuf_utf8_replace(&local_sb,
1769                                             padding / 2 - 1,
1770                                             len - (padding - 2),
1771                                             "..");
1772                         break;
1773                 case trunc_right:
1774                         strbuf_utf8_replace(&local_sb,
1775                                             padding - 2, len - (padding - 2),
1776                                             "..");
1777                         break;
1778                 case trunc_none:
1779                         break;
1780                 }
1781                 strbuf_addbuf(sb, &local_sb);
1782         } else {
1783                 size_t sb_len = sb->len, offset = 0;
1784                 if (c->flush_type == flush_left)
1785                         offset = padding - len;
1786                 else if (c->flush_type == flush_both)
1787                         offset = (padding - len) / 2;
1788                 /*
1789                  * we calculate padding in columns, now
1790                  * convert it back to chars
1791                  */
1792                 padding = padding - len + local_sb.len;
1793                 strbuf_addchars(sb, ' ', padding);
1794                 memcpy(sb->buf + sb_len + offset, local_sb.buf,
1795                        local_sb.len);
1796         }
1797         strbuf_release(&local_sb);
1798         c->flush_type = no_flush;
1799         return total_consumed;
1800 }
1801
1802 static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
1803                                  const char *placeholder,
1804                                  void *context)
1805 {
1806         size_t consumed, orig_len;
1807         enum {
1808                 NO_MAGIC,
1809                 ADD_LF_BEFORE_NON_EMPTY,
1810                 DEL_LF_BEFORE_EMPTY,
1811                 ADD_SP_BEFORE_NON_EMPTY
1812         } magic = NO_MAGIC;
1813
1814         switch (placeholder[0]) {
1815         case '-':
1816                 magic = DEL_LF_BEFORE_EMPTY;
1817                 break;
1818         case '+':
1819                 magic = ADD_LF_BEFORE_NON_EMPTY;
1820                 break;
1821         case ' ':
1822                 magic = ADD_SP_BEFORE_NON_EMPTY;
1823                 break;
1824         default:
1825                 break;
1826         }
1827         if (magic != NO_MAGIC) {
1828                 placeholder++;
1829
1830                 switch (placeholder[0]) {
1831                 case 'w':
1832                         /*
1833                          * `%+w()` cannot ever expand to a non-empty string,
1834                          * and it potentially changes the layout of preceding
1835                          * contents. We're thus not able to handle the magic in
1836                          * this combination and refuse the pattern.
1837                          */
1838                         return 0;
1839                 };
1840         }
1841
1842         orig_len = sb->len;
1843         if (((struct format_commit_context *)context)->flush_type != no_flush)
1844                 consumed = format_and_pad_commit(sb, placeholder, context);
1845         else
1846                 consumed = format_commit_one(sb, placeholder, context);
1847         if (magic == NO_MAGIC)
1848                 return consumed;
1849
1850         if ((orig_len == sb->len) && magic == DEL_LF_BEFORE_EMPTY) {
1851                 while (sb->len && sb->buf[sb->len - 1] == '\n')
1852                         strbuf_setlen(sb, sb->len - 1);
1853         } else if (orig_len != sb->len) {
1854                 if (magic == ADD_LF_BEFORE_NON_EMPTY)
1855                         strbuf_insertstr(sb, orig_len, "\n");
1856                 else if (magic == ADD_SP_BEFORE_NON_EMPTY)
1857                         strbuf_insertstr(sb, orig_len, " ");
1858         }
1859         return consumed + 1;
1860 }
1861
1862 static size_t userformat_want_item(struct strbuf *sb UNUSED,
1863                                    const char *placeholder,
1864                                    void *context)
1865 {
1866         struct userformat_want *w = context;
1867
1868         if (*placeholder == '+' || *placeholder == '-' || *placeholder == ' ')
1869                 placeholder++;
1870
1871         switch (*placeholder) {
1872         case 'N':
1873                 w->notes = 1;
1874                 break;
1875         case 'S':
1876                 w->source = 1;
1877                 break;
1878         case 'd':
1879         case 'D':
1880                 w->decorate = 1;
1881                 break;
1882         }
1883         return 0;
1884 }
1885
1886 void userformat_find_requirements(const char *fmt, struct userformat_want *w)
1887 {
1888         struct strbuf dummy = STRBUF_INIT;
1889
1890         if (!fmt) {
1891                 if (!user_format)
1892                         return;
1893                 fmt = user_format;
1894         }
1895         strbuf_expand(&dummy, fmt, userformat_want_item, w);
1896         strbuf_release(&dummy);
1897 }
1898
1899 void repo_format_commit_message(struct repository *r,
1900                                 const struct commit *commit,
1901                                 const char *format, struct strbuf *sb,
1902                                 const struct pretty_print_context *pretty_ctx)
1903 {
1904         struct format_commit_context context = {
1905                 .repository = r,
1906                 .commit = commit,
1907                 .pretty_ctx = pretty_ctx,
1908                 .wrap_start = sb->len
1909         };
1910         const char *output_enc = pretty_ctx->output_encoding;
1911         const char *utf8 = "UTF-8";
1912
1913         strbuf_expand(sb, format, format_commit_item, &context);
1914         rewrap_message_tail(sb, &context, 0, 0, 0);
1915
1916         /*
1917          * Convert output to an actual output encoding; note that
1918          * format_commit_item() will always use UTF-8, so we don't
1919          * have to bother if that's what the output wants.
1920          */
1921         if (output_enc) {
1922                 if (same_encoding(utf8, output_enc))
1923                         output_enc = NULL;
1924         } else {
1925                 if (context.commit_encoding &&
1926                     !same_encoding(context.commit_encoding, utf8))
1927                         output_enc = context.commit_encoding;
1928         }
1929
1930         if (output_enc) {
1931                 size_t outsz;
1932                 char *out = reencode_string_len(sb->buf, sb->len,
1933                                                 output_enc, utf8, &outsz);
1934                 if (out)
1935                         strbuf_attach(sb, out, outsz, outsz + 1);
1936         }
1937
1938         free(context.commit_encoding);
1939         repo_unuse_commit_buffer(r, commit, context.message);
1940 }
1941
1942 static void pp_header(struct pretty_print_context *pp,
1943                       const char *encoding,
1944                       const struct commit *commit,
1945                       const char **msg_p,
1946                       struct strbuf *sb)
1947 {
1948         int parents_shown = 0;
1949
1950         for (;;) {
1951                 const char *name, *line = *msg_p;
1952                 int linelen = get_one_line(*msg_p);
1953
1954                 if (!linelen)
1955                         return;
1956                 *msg_p += linelen;
1957
1958                 if (linelen == 1)
1959                         /* End of header */
1960                         return;
1961
1962                 if (pp->fmt == CMIT_FMT_RAW) {
1963                         strbuf_add(sb, line, linelen);
1964                         continue;
1965                 }
1966
1967                 if (starts_with(line, "parent ")) {
1968                         if (linelen != the_hash_algo->hexsz + 8)
1969                                 die("bad parent line in commit");
1970                         continue;
1971                 }
1972
1973                 if (!parents_shown) {
1974                         unsigned num = commit_list_count(commit->parents);
1975                         /* with enough slop */
1976                         strbuf_grow(sb, num * (GIT_MAX_HEXSZ + 10) + 20);
1977                         add_merge_info(pp, sb, commit);
1978                         parents_shown = 1;
1979                 }
1980
1981                 /*
1982                  * MEDIUM == DEFAULT shows only author with dates.
1983                  * FULL shows both authors but not dates.
1984                  * FULLER shows both authors and dates.
1985                  */
1986                 if (skip_prefix(line, "author ", &name)) {
1987                         strbuf_grow(sb, linelen + 80);
1988                         pp_user_info(pp, "Author", sb, name, encoding);
1989                 }
1990                 if (skip_prefix(line, "committer ", &name) &&
1991                     (pp->fmt == CMIT_FMT_FULL || pp->fmt == CMIT_FMT_FULLER)) {
1992                         strbuf_grow(sb, linelen + 80);
1993                         pp_user_info(pp, "Commit", sb, name, encoding);
1994                 }
1995         }
1996 }
1997
1998 void pp_title_line(struct pretty_print_context *pp,
1999                    const char **msg_p,
2000                    struct strbuf *sb,
2001                    const char *encoding,
2002                    int need_8bit_cte)
2003 {
2004         static const int max_length = 78; /* per rfc2047 */
2005         struct strbuf title;
2006
2007         strbuf_init(&title, 80);
2008         *msg_p = format_subject(&title, *msg_p,
2009                                 pp->preserve_subject ? "\n" : " ");
2010
2011         strbuf_grow(sb, title.len + 1024);
2012         if (pp->print_email_subject) {
2013                 if (pp->rev)
2014                         fmt_output_email_subject(sb, pp->rev);
2015                 if (pp->encode_email_headers &&
2016                     needs_rfc2047_encoding(title.buf, title.len))
2017                         add_rfc2047(sb, title.buf, title.len,
2018                                                 encoding, RFC2047_SUBJECT);
2019                 else
2020                         strbuf_add_wrapped_bytes(sb, title.buf, title.len,
2021                                          -last_line_length(sb), 1, max_length);
2022         } else {
2023                 strbuf_addbuf(sb, &title);
2024         }
2025         strbuf_addch(sb, '\n');
2026
2027         if (need_8bit_cte == 0) {
2028                 int i;
2029                 for (i = 0; i < pp->in_body_headers.nr; i++) {
2030                         if (has_non_ascii(pp->in_body_headers.items[i].string)) {
2031                                 need_8bit_cte = 1;
2032                                 break;
2033                         }
2034                 }
2035         }
2036
2037         if (need_8bit_cte > 0) {
2038                 const char *header_fmt =
2039                         "MIME-Version: 1.0\n"
2040                         "Content-Type: text/plain; charset=%s\n"
2041                         "Content-Transfer-Encoding: 8bit\n";
2042                 strbuf_addf(sb, header_fmt, encoding);
2043         }
2044         if (pp->after_subject) {
2045                 strbuf_addstr(sb, pp->after_subject);
2046         }
2047         if (cmit_fmt_is_mail(pp->fmt)) {
2048                 strbuf_addch(sb, '\n');
2049         }
2050
2051         if (pp->in_body_headers.nr) {
2052                 int i;
2053                 for (i = 0; i < pp->in_body_headers.nr; i++) {
2054                         strbuf_addstr(sb, pp->in_body_headers.items[i].string);
2055                         free(pp->in_body_headers.items[i].string);
2056                 }
2057                 string_list_clear(&pp->in_body_headers, 0);
2058                 strbuf_addch(sb, '\n');
2059         }
2060
2061         strbuf_release(&title);
2062 }
2063
2064 static int pp_utf8_width(const char *start, const char *end)
2065 {
2066         int width = 0;
2067         size_t remain = end - start;
2068
2069         while (remain) {
2070                 int n = utf8_width(&start, &remain);
2071                 if (n < 0 || !start)
2072                         return -1;
2073                 width += n;
2074         }
2075         return width;
2076 }
2077
2078 static void strbuf_add_tabexpand(struct strbuf *sb, struct grep_opt *opt,
2079                                  int color, int tabwidth, const char *line,
2080                                  int linelen)
2081 {
2082         const char *tab;
2083
2084         while ((tab = memchr(line, '\t', linelen)) != NULL) {
2085                 int width = pp_utf8_width(line, tab);
2086
2087                 /*
2088                  * If it wasn't well-formed utf8, or it
2089                  * had characters with badly defined
2090                  * width (control characters etc), just
2091                  * give up on trying to align things.
2092                  */
2093                 if (width < 0)
2094                         break;
2095
2096                 /* Output the data .. */
2097                 append_line_with_color(sb, opt, line, tab - line, color,
2098                                        GREP_CONTEXT_BODY,
2099                                        GREP_HEADER_FIELD_MAX);
2100
2101                 /* .. and the de-tabified tab */
2102                 strbuf_addchars(sb, ' ', tabwidth - (width % tabwidth));
2103
2104                 /* Skip over the printed part .. */
2105                 linelen -= tab + 1 - line;
2106                 line = tab + 1;
2107         }
2108
2109         /*
2110          * Print out everything after the last tab without
2111          * worrying about width - there's nothing more to
2112          * align.
2113          */
2114         append_line_with_color(sb, opt, line, linelen, color, GREP_CONTEXT_BODY,
2115                                GREP_HEADER_FIELD_MAX);
2116 }
2117
2118 /*
2119  * pp_handle_indent() prints out the intendation, and
2120  * the whole line (without the final newline), after
2121  * de-tabifying.
2122  */
2123 static void pp_handle_indent(struct pretty_print_context *pp,
2124                              struct strbuf *sb, int indent,
2125                              const char *line, int linelen)
2126 {
2127         struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
2128
2129         strbuf_addchars(sb, ' ', indent);
2130         if (pp->expand_tabs_in_log)
2131                 strbuf_add_tabexpand(sb, opt, pp->color, pp->expand_tabs_in_log,
2132                                      line, linelen);
2133         else
2134                 append_line_with_color(sb, opt, line, linelen, pp->color,
2135                                        GREP_CONTEXT_BODY,
2136                                        GREP_HEADER_FIELD_MAX);
2137 }
2138
2139 static int is_mboxrd_from(const char *line, int len)
2140 {
2141         /*
2142          * a line matching /^From $/ here would only have len == 4
2143          * at this point because is_empty_line would've trimmed all
2144          * trailing space
2145          */
2146         return len > 4 && starts_with(line + strspn(line, ">"), "From ");
2147 }
2148
2149 void pp_remainder(struct pretty_print_context *pp,
2150                   const char **msg_p,
2151                   struct strbuf *sb,
2152                   int indent)
2153 {
2154         struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
2155         int first = 1;
2156
2157         for (;;) {
2158                 const char *line = *msg_p;
2159                 int linelen = get_one_line(line);
2160                 *msg_p += linelen;
2161
2162                 if (!linelen)
2163                         break;
2164
2165                 if (is_blank_line(line, &linelen)) {
2166                         if (first)
2167                                 continue;
2168                         if (pp->fmt == CMIT_FMT_SHORT)
2169                                 break;
2170                 }
2171                 first = 0;
2172
2173                 strbuf_grow(sb, linelen + indent + 20);
2174                 if (indent)
2175                         pp_handle_indent(pp, sb, indent, line, linelen);
2176                 else if (pp->expand_tabs_in_log)
2177                         strbuf_add_tabexpand(sb, opt, pp->color,
2178                                              pp->expand_tabs_in_log, line,
2179                                              linelen);
2180                 else {
2181                         if (pp->fmt == CMIT_FMT_MBOXRD &&
2182                                         is_mboxrd_from(line, linelen))
2183                                 strbuf_addch(sb, '>');
2184
2185                         append_line_with_color(sb, opt, line, linelen,
2186                                                pp->color, GREP_CONTEXT_BODY,
2187                                                GREP_HEADER_FIELD_MAX);
2188                 }
2189                 strbuf_addch(sb, '\n');
2190         }
2191 }
2192
2193 void pretty_print_commit(struct pretty_print_context *pp,
2194                          const struct commit *commit,
2195                          struct strbuf *sb)
2196 {
2197         unsigned long beginning_of_body;
2198         int indent = 4;
2199         const char *msg;
2200         const char *reencoded;
2201         const char *encoding;
2202         int need_8bit_cte = pp->need_8bit_cte;
2203
2204         if (pp->fmt == CMIT_FMT_USERFORMAT) {
2205                 format_commit_message(commit, user_format, sb, pp);
2206                 return;
2207         }
2208
2209         encoding = get_log_output_encoding();
2210         msg = reencoded = logmsg_reencode(commit, NULL, encoding);
2211
2212         if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt))
2213                 indent = 0;
2214
2215         /*
2216          * We need to check and emit Content-type: to mark it
2217          * as 8-bit if we haven't done so.
2218          */
2219         if (cmit_fmt_is_mail(pp->fmt) && need_8bit_cte == 0) {
2220                 int i, ch, in_body;
2221
2222                 for (in_body = i = 0; (ch = msg[i]); i++) {
2223                         if (!in_body) {
2224                                 /* author could be non 7-bit ASCII but
2225                                  * the log may be so; skip over the
2226                                  * header part first.
2227                                  */
2228                                 if (ch == '\n' && msg[i+1] == '\n')
2229                                         in_body = 1;
2230                         }
2231                         else if (non_ascii(ch)) {
2232                                 need_8bit_cte = 1;
2233                                 break;
2234                         }
2235                 }
2236         }
2237
2238         pp_header(pp, encoding, commit, &msg, sb);
2239         if (pp->fmt != CMIT_FMT_ONELINE && !pp->print_email_subject) {
2240                 strbuf_addch(sb, '\n');
2241         }
2242
2243         /* Skip excess blank lines at the beginning of body, if any... */
2244         msg = skip_blank_lines(msg);
2245
2246         /* These formats treat the title line specially. */
2247         if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt))
2248                 pp_title_line(pp, &msg, sb, encoding, need_8bit_cte);
2249
2250         beginning_of_body = sb->len;
2251         if (pp->fmt != CMIT_FMT_ONELINE)
2252                 pp_remainder(pp, &msg, sb, indent);
2253         strbuf_rtrim(sb);
2254
2255         /* Make sure there is an EOLN for the non-oneline case */
2256         if (pp->fmt != CMIT_FMT_ONELINE)
2257                 strbuf_addch(sb, '\n');
2258
2259         /*
2260          * The caller may append additional body text in e-mail
2261          * format.  Make sure we did not strip the blank line
2262          * between the header and the body.
2263          */
2264         if (cmit_fmt_is_mail(pp->fmt) && sb->len <= beginning_of_body)
2265                 strbuf_addch(sb, '\n');
2266
2267         unuse_commit_buffer(commit, reencoded);
2268 }
2269
2270 void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit,
2271                     struct strbuf *sb)
2272 {
2273         struct pretty_print_context pp = {0};
2274         pp.fmt = fmt;
2275         pretty_print_commit(&pp, commit, sb);
2276 }