pretty.c

   1 #include "git-compat-util.h"
   2 #include "alloc.h"
   3 #include "config.h"
   4 #include "commit.h"
   5 #include "environment.h"
   6 #include "gettext.h"
   7 #include "hex.h"
   8 #include "utf8.h"
   9 #include "diff.h"
  10 #include "pager.h"
  11 #include "revision.h"
  12 #include "string-list.h"
  13 #include "mailmap.h"
  14 #include "log-tree.h"
  15 #include "notes.h"
  16 #include "color.h"
  17 #include "reflog-walk.h"
  18 #include "gpg-interface.h"
  19 #include "trailer.h"
  20 #include "run-command.h"
  21
  22 /*
  23  * The limit for formatting directives, which enable the caller to append
  24  * arbitrarily many bytes to the formatted buffer. This includes padding
  25  * and wrapping formatters.
  26  */
  27 #define FORMATTING_LIMIT (16 * 1024)
  28
  29 static char *user_format;
  30 static struct cmt_fmt_map {
  31         const char *name;
  32         enum cmit_fmt format;
  33         int is_tformat;
  34         int expand_tabs_in_log;
  35         int is_alias;
  36         enum date_mode_type default_date_mode_type;
  37         const char *user_format;
  38 } *commit_formats;
  39 static size_t builtin_formats_len;
  40 static size_t commit_formats_len;
  41 static size_t commit_formats_alloc;
  42 static struct cmt_fmt_map *find_commit_format(const char *sought);
  43
  44 int commit_format_is_empty(enum cmit_fmt fmt)
  45 {
  46         return fmt == CMIT_FMT_USERFORMAT && !*user_format;
  47 }
  48
  49 static void save_user_format(struct rev_info *rev, const char *cp, int is_tformat)
  50 {
  51         free(user_format);
  52         user_format = xstrdup(cp);
  53         if (is_tformat)
  54                 rev->use_terminator = 1;
  55         rev->commit_format = CMIT_FMT_USERFORMAT;
  56 }
  57
  58 static int git_pretty_formats_config(const char *var, const char *value,
  59                                      void *cb UNUSED)
  60 {
  61         struct cmt_fmt_map *commit_format = NULL;
  62         const char *name;
  63         const char *fmt;
  64         int i;
  65
  66         if (!skip_prefix(var, "pretty.", &name))
  67                 return 0;
  68
  69         for (i = 0; i < builtin_formats_len; i++) {
  70                 if (!strcmp(commit_formats[i].name, name))
  71                         return 0;
  72         }
  73
  74         for (i = builtin_formats_len; i < commit_formats_len; i++) {
  75                 if (!strcmp(commit_formats[i].name, name)) {
  76                         commit_format = &commit_formats[i];
  77                         break;
  78                 }
  79         }
  80
  81         if (!commit_format) {
  82                 ALLOC_GROW(commit_formats, commit_formats_len+1,
  83                            commit_formats_alloc);
  84                 commit_format = &commit_formats[commit_formats_len];
  85                 memset(commit_format, 0, sizeof(*commit_format));
  86                 commit_formats_len++;
  87         }
  88
  89         commit_format->name = xstrdup(name);
  90         commit_format->format = CMIT_FMT_USERFORMAT;
  91         if (git_config_string(&fmt, var, value))
  92                 return -1;
  93
  94         if (skip_prefix(fmt, "format:", &fmt))
  95                 commit_format->is_tformat = 0;
  96         else if (skip_prefix(fmt, "tformat:", &fmt) || strchr(fmt, '%'))
  97                 commit_format->is_tformat = 1;
  98         else
  99                 commit_format->is_alias = 1;
 100         commit_format->user_format = fmt;
 101
 102         return 0;
 103 }
 104
 105 static void setup_commit_formats(void)
 106 {
 107         struct cmt_fmt_map builtin_formats[] = {
 108                 { "raw",        CMIT_FMT_RAW,           0,      0 },
 109                 { "medium",     CMIT_FMT_MEDIUM,        0,      8 },
 110                 { "short",      CMIT_FMT_SHORT,         0,      0 },
 111                 { "email",      CMIT_FMT_EMAIL,         0,      0 },
 112                 { "mboxrd",     CMIT_FMT_MBOXRD,        0,      0 },
 113                 { "fuller",     CMIT_FMT_FULLER,        0,      8 },
 114                 { "full",       CMIT_FMT_FULL,          0,      8 },
 115                 { "oneline",    CMIT_FMT_ONELINE,       1,      0 },
 116                 { "reference",  CMIT_FMT_USERFORMAT,    1,      0,
 117                         0, DATE_SHORT, "%C(auto)%h (%s, %ad)" },
 118                 /*
 119                  * Please update $__git_log_pretty_formats in
 120                  * git-completion.bash when you add new formats.
 121                  */
 122         };
 123         commit_formats_len = ARRAY_SIZE(builtin_formats);
 124         builtin_formats_len = commit_formats_len;
 125         ALLOC_GROW(commit_formats, commit_formats_len, commit_formats_alloc);
 126         COPY_ARRAY(commit_formats, builtin_formats,
 127                    ARRAY_SIZE(builtin_formats));
 128
 129         git_config(git_pretty_formats_config, NULL);
 130 }
 131
 132 static struct cmt_fmt_map *find_commit_format_recursive(const char *sought,
 133                                                         const char *original,
 134                                                         int num_redirections)
 135 {
 136         struct cmt_fmt_map *found = NULL;
 137         size_t found_match_len = 0;
 138         int i;
 139
 140         if (num_redirections >= commit_formats_len)
 141                 die("invalid --pretty format: "
 142                     "'%s' references an alias which points to itself",
 143                     original);
 144
 145         for (i = 0; i < commit_formats_len; i++) {
 146                 size_t match_len;
 147
 148                 if (!starts_with(commit_formats[i].name, sought))
 149                         continue;
 150
 151                 match_len = strlen(commit_formats[i].name);
 152                 if (found == NULL || found_match_len > match_len) {
 153                         found = &commit_formats[i];
 154                         found_match_len = match_len;
 155                 }
 156         }
 157
 158         if (found && found->is_alias) {
 159                 found = find_commit_format_recursive(found->user_format,
 160                                                      original,
 161                                                      num_redirections+1);
 162         }
 163
 164         return found;
 165 }
 166
 167 static struct cmt_fmt_map *find_commit_format(const char *sought)
 168 {
 169         if (!commit_formats)
 170                 setup_commit_formats();
 171
 172         return find_commit_format_recursive(sought, sought, 0);
 173 }
 174
 175 void get_commit_format(const char *arg, struct rev_info *rev)
 176 {
 177         struct cmt_fmt_map *commit_format;
 178
 179         rev->use_terminator = 0;
 180         if (!arg) {
 181                 rev->commit_format = CMIT_FMT_DEFAULT;
 182                 return;
 183         }
 184         if (skip_prefix(arg, "format:", &arg)) {
 185                 save_user_format(rev, arg, 0);
 186                 return;
 187         }
 188
 189         if (!*arg || skip_prefix(arg, "tformat:", &arg) || strchr(arg, '%')) {
 190                 save_user_format(rev, arg, 1);
 191                 return;
 192         }
 193
 194         commit_format = find_commit_format(arg);
 195         if (!commit_format)
 196                 die("invalid --pretty format: %s", arg);
 197
 198         rev->commit_format = commit_format->format;
 199         rev->use_terminator = commit_format->is_tformat;
 200         rev->expand_tabs_in_log_default = commit_format->expand_tabs_in_log;
 201         if (!rev->date_mode_explicit && commit_format->default_date_mode_type)
 202                 rev->date_mode.type = commit_format->default_date_mode_type;
 203         if (commit_format->format == CMIT_FMT_USERFORMAT) {
 204                 save_user_format(rev, commit_format->user_format,
 205                                  commit_format->is_tformat);
 206         }
 207 }
 208
 209 /*
 210  * Generic support for pretty-printing the header
 211  */
 212 static int get_one_line(const char *msg)
 213 {
 214         int ret = 0;
 215
 216         for (;;) {
 217                 char c = *msg++;
 218                 if (!c)
 219                         break;
 220                 ret++;
 221                 if (c == '\n')
 222                         break;
 223         }
 224         return ret;
 225 }
 226
 227 /* High bit set, or ISO-2022-INT */
 228 static int non_ascii(int ch)
 229 {
 230         return !isascii(ch) || ch == '\033';
 231 }
 232
 233 int has_non_ascii(const char *s)
 234 {
 235         int ch;
 236         if (!s)
 237                 return 0;
 238         while ((ch = *s++) != '\0') {
 239                 if (non_ascii(ch))
 240                         return 1;
 241         }
 242         return 0;
 243 }
 244
 245 static int is_rfc822_special(char ch)
 246 {
 247         switch (ch) {
 248         case '(':
 249         case ')':
 250         case '<':
 251         case '>':
 252         case '[':
 253         case ']':
 254         case ':':
 255         case ';':
 256         case '@':
 257         case ',':
 258         case '.':
 259         case '"':
 260         case '\\':
 261                 return 1;
 262         default:
 263                 return 0;
 264         }
 265 }
 266
 267 static int needs_rfc822_quoting(const char *s, int len)
 268 {
 269         int i;
 270         for (i = 0; i < len; i++)
 271                 if (is_rfc822_special(s[i]))
 272                         return 1;
 273         return 0;
 274 }
 275
 276 static int last_line_length(struct strbuf *sb)
 277 {
 278         int i;
 279
 280         /* How many bytes are already used on the last line? */
 281         for (i = sb->len - 1; i >= 0; i--)
 282                 if (sb->buf[i] == '\n')
 283                         break;
 284         return sb->len - (i + 1);
 285 }
 286
 287 static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
 288 {
 289         int i;
 290
 291         /* just a guess, we may have to also backslash-quote */
 292         strbuf_grow(out, len + 2);
 293
 294         strbuf_addch(out, '"');
 295         for (i = 0; i < len; i++) {
 296                 switch (s[i]) {
 297                 case '"':
 298                 case '\\':
 299                         strbuf_addch(out, '\\');
 300                         /* fall through */
 301                 default:
 302                         strbuf_addch(out, s[i]);
 303                 }
 304         }
 305         strbuf_addch(out, '"');
 306 }
 307
 308 enum rfc2047_type {
 309         RFC2047_SUBJECT,
 310         RFC2047_ADDRESS
 311 };
 312
 313 static int is_rfc2047_special(char ch, enum rfc2047_type type)
 314 {
 315         /*
 316          * rfc2047, section 4.2:
 317          *
 318          *    8-bit values which correspond to printable ASCII characters other
 319          *    than "=", "?", and "_" (underscore), MAY be represented as those
 320          *    characters.  (But see section 5 for restrictions.)  In
 321          *    particular, SPACE and TAB MUST NOT be represented as themselves
 322          *    within encoded words.
 323          */
 324
 325         /*
 326          * rule out non-ASCII characters and non-printable characters (the
 327          * non-ASCII check should be redundant as isprint() is not localized
 328          * and only knows about ASCII, but be defensive about that)
 329          */
 330         if (non_ascii(ch) || !isprint(ch))
 331                 return 1;
 332
 333         /*
 334          * rule out special printable characters (' ' should be the only
 335          * whitespace character considered printable, but be defensive and use
 336          * isspace())
 337          */
 338         if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
 339                 return 1;
 340
 341         /*
 342          * rfc2047, section 5.3:
 343          *
 344          *    As a replacement for a 'word' entity within a 'phrase', for example,
 345          *    one that precedes an address in a From, To, or Cc header.  The ABNF
 346          *    definition for 'phrase' from RFC 822 thus becomes:
 347          *
 348          *    phrase = 1*( encoded-word / word )
 349          *
 350          *    In this case the set of characters that may be used in a "Q"-encoded
 351          *    'encoded-word' is restricted to: <upper and lower case ASCII
 352          *    letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
 353          *    (underscore, ASCII 95.)>.  An 'encoded-word' that appears within a
 354          *    'phrase' MUST be separated from any adjacent 'word', 'text' or
 355          *    'special' by 'linear-white-space'.
 356          */
 357
 358         if (type != RFC2047_ADDRESS)
 359                 return 0;
 360
 361         /* '=' and '_' are special cases and have been checked above */
 362         return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
 363 }
 364
 365 static int needs_rfc2047_encoding(const char *line, int len)
 366 {
 367         int i;
 368
 369         for (i = 0; i < len; i++) {
 370                 int ch = line[i];
 371                 if (non_ascii(ch) || ch == '\n')
 372                         return 1;
 373                 if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
 374                         return 1;
 375         }
 376
 377         return 0;
 378 }
 379
 380 static void add_rfc2047(struct strbuf *sb, const char *line, size_t len,
 381                        const char *encoding, enum rfc2047_type type)
 382 {
 383         static const int max_encoded_length = 76; /* per rfc2047 */
 384         int i;
 385         int line_len = last_line_length(sb);
 386
 387         strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
 388         strbuf_addf(sb, "=?%s?q?", encoding);
 389         line_len += strlen(encoding) + 5; /* 5 for =??q? */
 390
 391         while (len) {
 392                 /*
 393                  * RFC 2047, section 5 (3):
 394                  *
 395                  * Each 'encoded-word' MUST represent an integral number of
 396                  * characters.  A multi-octet character may not be split across
 397                  * adjacent 'encoded- word's.
 398                  */
 399                 const unsigned char *p = (const unsigned char *)line;
 400                 int chrlen = mbs_chrlen(&line, &len, encoding);
 401                 int is_special = (chrlen > 1) || is_rfc2047_special(*p, type);
 402
 403                 /* "=%02X" * chrlen, or the byte itself */
 404                 const char *encoded_fmt = is_special ? "=%02X"    : "%c";
 405                 int         encoded_len = is_special ? 3 * chrlen : 1;
 406
 407                 /*
 408                  * According to RFC 2047, we could encode the special character
 409                  * ' ' (space) with '_' (underscore) for readability. But many
 410                  * programs do not understand this and just leave the
 411                  * underscore in place. Thus, we do nothing special here, which
 412                  * causes ' ' to be encoded as '=20', avoiding this problem.
 413                  */
 414
 415                 if (line_len + encoded_len + 2 > max_encoded_length) {
 416                         /* It won't fit with trailing "?=" --- break the line */
 417                         strbuf_addf(sb, "?=\n =?%s?q?", encoding);
 418                         line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */
 419                 }
 420
 421                 for (i = 0; i < chrlen; i++)
 422                         strbuf_addf(sb, encoded_fmt, p[i]);
 423                 line_len += encoded_len;
 424         }
 425         strbuf_addstr(sb, "?=");
 426 }
 427
 428 const char *show_ident_date(const struct ident_split *ident,
 429                             const struct date_mode *mode)
 430 {
 431         timestamp_t date = 0;
 432         long tz = 0;
 433
 434         if (ident->date_begin && ident->date_end)
 435                 date = parse_timestamp(ident->date_begin, NULL, 10);
 436         if (date_overflows(date))
 437                 date = 0;
 438         else {
 439                 if (ident->tz_begin && ident->tz_end)
 440                         tz = strtol(ident->tz_begin, NULL, 10);
 441                 if (tz >= INT_MAX || tz <= INT_MIN)
 442                         tz = 0;
 443         }
 444         return show_date(date, tz, mode);
 445 }
 446
 447 static inline void strbuf_add_with_color(struct strbuf *sb, const char *color,
 448                                          const char *buf, size_t buflen)
 449 {
 450         strbuf_addstr(sb, color);
 451         strbuf_add(sb, buf, buflen);
 452         if (*color)
 453                 strbuf_addstr(sb, GIT_COLOR_RESET);
 454 }
 455
 456 static void append_line_with_color(struct strbuf *sb, struct grep_opt *opt,
 457                                    const char *line, size_t linelen,
 458                                    int color, enum grep_context ctx,
 459                                    enum grep_header_field field)
 460 {
 461         const char *buf, *eol, *line_color, *match_color;
 462         regmatch_t match;
 463         int eflags = 0;
 464
 465         buf = line;
 466         eol = buf + linelen;
 467
 468         if (!opt || !want_color(color) || opt->invert)
 469                 goto end;
 470
 471         line_color = opt->colors[GREP_COLOR_SELECTED];
 472         match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
 473
 474         while (grep_next_match(opt, buf, eol, ctx, &match, field, eflags)) {
 475                 if (match.rm_so == match.rm_eo)
 476                         break;
 477
 478                 strbuf_add_with_color(sb, line_color, buf, match.rm_so);
 479                 strbuf_add_with_color(sb, match_color, buf + match.rm_so,
 480                                       match.rm_eo - match.rm_so);
 481                 buf += match.rm_eo;
 482                 eflags = REG_NOTBOL;
 483         }
 484
 485         if (eflags)
 486                 strbuf_add_with_color(sb, line_color, buf, eol - buf);
 487         else {
 488 end:
 489                 strbuf_add(sb, buf, eol - buf);
 490         }
 491 }
 492
 493 static int use_in_body_from(const struct pretty_print_context *pp,
 494                             const struct ident_split *ident)
 495 {
 496         if (pp->rev && pp->rev->force_in_body_from)
 497                 return 1;
 498         if (ident_cmp(pp->from_ident, ident))
 499                 return 1;
 500         return 0;
 501 }
 502
 503 void pp_user_info(struct pretty_print_context *pp,
 504                   const char *what, struct strbuf *sb,
 505                   const char *line, const char *encoding)
 506 {
 507         struct ident_split ident;
 508         char *line_end;
 509         const char *mailbuf, *namebuf;
 510         size_t namelen, maillen;
 511         int max_length = 78; /* per rfc2822 */
 512
 513         if (pp->fmt == CMIT_FMT_ONELINE)
 514                 return;
 515
 516         line_end = strchrnul(line, '\n');
 517         if (split_ident_line(&ident, line, line_end - line))
 518                 return;
 519
 520         mailbuf = ident.mail_begin;
 521         maillen = ident.mail_end - ident.mail_begin;
 522         namebuf = ident.name_begin;
 523         namelen = ident.name_end - ident.name_begin;
 524
 525         if (pp->mailmap)
 526                 map_user(pp->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
 527
 528         if (cmit_fmt_is_mail(pp->fmt)) {
 529                 if (pp->from_ident && use_in_body_from(pp, &ident)) {
 530                         struct strbuf buf = STRBUF_INIT;
 531
 532                         strbuf_addstr(&buf, "From: ");
 533                         strbuf_add(&buf, namebuf, namelen);
 534                         strbuf_addstr(&buf, " <");
 535                         strbuf_add(&buf, mailbuf, maillen);
 536                         strbuf_addstr(&buf, ">\n");
 537                         string_list_append(&pp->in_body_headers,
 538                                            strbuf_detach(&buf, NULL));
 539
 540                         mailbuf = pp->from_ident->mail_begin;
 541                         maillen = pp->from_ident->mail_end - mailbuf;
 542                         namebuf = pp->from_ident->name_begin;
 543                         namelen = pp->from_ident->name_end - namebuf;
 544                 }
 545
 546                 strbuf_addstr(sb, "From: ");
 547                 if (pp->encode_email_headers &&
 548                     needs_rfc2047_encoding(namebuf, namelen)) {
 549                         add_rfc2047(sb, namebuf, namelen,
 550                                     encoding, RFC2047_ADDRESS);
 551                         max_length = 76; /* per rfc2047 */
 552                 } else if (needs_rfc822_quoting(namebuf, namelen)) {
 553                         struct strbuf quoted = STRBUF_INIT;
 554                         add_rfc822_quoted(&quoted, namebuf, namelen);
 555                         strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len,
 556                                                         -6, 1, max_length);
 557                         strbuf_release(&quoted);
 558                 } else {
 559                         strbuf_add_wrapped_bytes(sb, namebuf, namelen,
 560                                                  -6, 1, max_length);
 561                 }
 562
 563                 if (max_length <
 564                     last_line_length(sb) + strlen(" <") + maillen + strlen(">"))
 565                         strbuf_addch(sb, '\n');
 566                 strbuf_addf(sb, " <%.*s>\n", (int)maillen, mailbuf);
 567         } else {
 568                 struct strbuf id = STRBUF_INIT;
 569                 enum grep_header_field field = GREP_HEADER_FIELD_MAX;
 570                 struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
 571
 572                 if (!strcmp(what, "Author"))
 573                         field = GREP_HEADER_AUTHOR;
 574                 else if (!strcmp(what, "Commit"))
 575                         field = GREP_HEADER_COMMITTER;
 576
 577                 strbuf_addf(sb, "%s: ", what);
 578                 if (pp->fmt == CMIT_FMT_FULLER)
 579                         strbuf_addchars(sb, ' ', 4);
 580
 581                 strbuf_addf(&id, "%.*s <%.*s>", (int)namelen, namebuf,
 582                             (int)maillen, mailbuf);
 583
 584                 append_line_with_color(sb, opt, id.buf, id.len, pp->color,
 585                                        GREP_CONTEXT_HEAD, field);
 586                 strbuf_addch(sb, '\n');
 587                 strbuf_release(&id);
 588         }
 589
 590         switch (pp->fmt) {
 591         case CMIT_FMT_MEDIUM:
 592                 strbuf_addf(sb, "Date:   %s\n",
 593                             show_ident_date(&ident, &pp->date_mode));
 594                 break;
 595         case CMIT_FMT_EMAIL:
 596         case CMIT_FMT_MBOXRD:
 597                 strbuf_addf(sb, "Date: %s\n",
 598                             show_ident_date(&ident, DATE_MODE(RFC2822)));
 599                 break;
 600         case CMIT_FMT_FULLER:
 601                 strbuf_addf(sb, "%sDate: %s\n", what,
 602                             show_ident_date(&ident, &pp->date_mode));
 603                 break;
 604         default:
 605                 /* notin' */
 606                 break;
 607         }
 608 }
 609
 610 static int is_blank_line(const char *line, int *len_p)
 611 {
 612         int len = *len_p;
 613         while (len && isspace(line[len - 1]))
 614                 len--;
 615         *len_p = len;
 616         return !len;
 617 }
 618
 619 const char *skip_blank_lines(const char *msg)
 620 {
 621         for (;;) {
 622                 int linelen = get_one_line(msg);
 623                 int ll = linelen;
 624                 if (!linelen)
 625                         break;
 626                 if (!is_blank_line(msg, &ll))
 627                         break;
 628                 msg += linelen;
 629         }
 630         return msg;
 631 }
 632
 633 static void add_merge_info(const struct pretty_print_context *pp,
 634                            struct strbuf *sb, const struct commit *commit)
 635 {
 636         struct commit_list *parent = commit->parents;
 637
 638         if ((pp->fmt == CMIT_FMT_ONELINE) || (cmit_fmt_is_mail(pp->fmt)) ||
 639             !parent || !parent->next)
 640                 return;
 641
 642         strbuf_addstr(sb, "Merge:");
 643
 644         while (parent) {
 645                 struct object_id *oidp = &parent->item->object.oid;
 646                 strbuf_addch(sb, ' ');
 647                 if (pp->abbrev)
 648                         strbuf_add_unique_abbrev(sb, oidp, pp->abbrev);
 649                 else
 650                         strbuf_addstr(sb, oid_to_hex(oidp));
 651                 parent = parent->next;
 652         }
 653         strbuf_addch(sb, '\n');
 654 }
 655
 656 static char *get_header(const char *msg, const char *key)
 657 {
 658         size_t len;
 659         const char *v = find_commit_header(msg, key, &len);
 660         return v ? xmemdupz(v, len) : NULL;
 661 }
 662
 663 static char *replace_encoding_header(char *buf, const char *encoding)
 664 {
 665         struct strbuf tmp = STRBUF_INIT;
 666         size_t start, len;
 667         char *cp = buf;
 668
 669         /* guess if there is an encoding header before a \n\n */
 670         while (!starts_with(cp, "encoding ")) {
 671                 cp = strchr(cp, '\n');
 672                 if (!cp || *++cp == '\n')
 673                         return buf;
 674         }
 675         start = cp - buf;
 676         cp = strchr(cp, '\n');
 677         if (!cp)
 678                 return buf; /* should not happen but be defensive */
 679         len = cp + 1 - (buf + start);
 680
 681         strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
 682         if (is_encoding_utf8(encoding)) {
 683                 /* we have re-coded to UTF-8; drop the header */
 684                 strbuf_remove(&tmp, start, len);
 685         } else {
 686                 /* just replaces XXXX in 'encoding XXXX\n' */
 687                 strbuf_splice(&tmp, start + strlen("encoding "),
 688                                           len - strlen("encoding \n"),
 689                                           encoding, strlen(encoding));
 690         }
 691         return strbuf_detach(&tmp, NULL);
 692 }
 693
 694 const char *repo_logmsg_reencode(struct repository *r,
 695                                  const struct commit *commit,
 696                                  char **commit_encoding,
 697                                  const char *output_encoding)
 698 {
 699         static const char *utf8 = "UTF-8";
 700         const char *use_encoding;
 701         char *encoding;
 702         const char *msg = repo_get_commit_buffer(r, commit, NULL);
 703         char *out;
 704
 705         if (!output_encoding || !*output_encoding) {
 706                 if (commit_encoding)
 707                         *commit_encoding = get_header(msg, "encoding");
 708                 return msg;
 709         }
 710         encoding = get_header(msg, "encoding");
 711         if (commit_encoding)
 712                 *commit_encoding = encoding;
 713         use_encoding = encoding ? encoding : utf8;
 714         if (same_encoding(use_encoding, output_encoding)) {
 715                 /*
 716                  * No encoding work to be done. If we have no encoding header
 717                  * at all, then there's nothing to do, and we can return the
 718                  * message verbatim (whether newly allocated or not).
 719                  */
 720                 if (!encoding)
 721                         return msg;
 722
 723                 /*
 724                  * Otherwise, we still want to munge the encoding header in the
 725                  * result, which will be done by modifying the buffer. If we
 726                  * are using a fresh copy, we can reuse it. But if we are using
 727                  * the cached copy from repo_get_commit_buffer, we need to duplicate it
 728                  * to avoid munging the cached copy.
 729                  */
 730                 if (msg == get_cached_commit_buffer(r, commit, NULL))
 731                         out = xstrdup(msg);
 732                 else
 733                         out = (char *)msg;
 734         }
 735         else {
 736                 /*
 737                  * There's actual encoding work to do. Do the reencoding, which
 738                  * still leaves the header to be replaced in the next step. At
 739                  * this point, we are done with msg. If we allocated a fresh
 740                  * copy, we can free it.
 741                  */
 742                 out = reencode_string(msg, output_encoding, use_encoding);
 743                 if (out)
 744                         repo_unuse_commit_buffer(r, commit, msg);
 745         }
 746
 747         /*
 748          * This replacement actually consumes the buffer we hand it, so we do
 749          * not have to worry about freeing the old "out" here.
 750          */
 751         if (out)
 752                 out = replace_encoding_header(out, output_encoding);
 753
 754         if (!commit_encoding)
 755                 free(encoding);
 756         /*
 757          * If the re-encoding failed, out might be NULL here; in that
 758          * case we just return the commit message verbatim.
 759          */
 760         return out ? out : msg;
 761 }
 762
 763 static int mailmap_name(const char **email, size_t *email_len,
 764                         const char **name, size_t *name_len)
 765 {
 766         static struct string_list *mail_map;
 767         if (!mail_map) {
 768                 CALLOC_ARRAY(mail_map, 1);
 769                 read_mailmap(mail_map);
 770         }
 771         return mail_map->nr && map_user(mail_map, email, email_len, name, name_len);
 772 }
 773
 774 static size_t format_person_part(struct strbuf *sb, char part,
 775                                  const char *msg, int len,
 776                                  const struct date_mode *dmode)
 777 {
 778         /* currently all placeholders have same length */
 779         const int placeholder_len = 2;
 780         struct ident_split s;
 781         const char *name, *mail;
 782         size_t maillen, namelen;
 783
 784         if (split_ident_line(&s, msg, len) < 0)
 785                 goto skip;
 786
 787         name = s.name_begin;
 788         namelen = s.name_end - s.name_begin;
 789         mail = s.mail_begin;
 790         maillen = s.mail_end - s.mail_begin;
 791
 792         if (part == 'N' || part == 'E' || part == 'L') /* mailmap lookup */
 793                 mailmap_name(&mail, &maillen, &name, &namelen);
 794         if (part == 'n' || part == 'N') {       /* name */
 795                 strbuf_add(sb, name, namelen);
 796                 return placeholder_len;
 797         }
 798         if (part == 'e' || part == 'E') {       /* email */
 799                 strbuf_add(sb, mail, maillen);
 800                 return placeholder_len;
 801         }
 802         if (part == 'l' || part == 'L') {       /* local-part */
 803                 const char *at = memchr(mail, '@', maillen);
 804                 if (at)
 805                         maillen = at - mail;
 806                 strbuf_add(sb, mail, maillen);
 807                 return placeholder_len;
 808         }
 809
 810         if (!s.date_begin)
 811                 goto skip;
 812
 813         if (part == 't') {      /* date, UNIX timestamp */
 814                 strbuf_add(sb, s.date_begin, s.date_end - s.date_begin);
 815                 return placeholder_len;
 816         }
 817
 818         switch (part) {
 819         case 'd':       /* date */
 820                 strbuf_addstr(sb, show_ident_date(&s, dmode));
 821                 return placeholder_len;
 822         case 'D':       /* date, RFC2822 style */
 823                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(RFC2822)));
 824                 return placeholder_len;
 825         case 'r':       /* date, relative */
 826                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(RELATIVE)));
 827                 return placeholder_len;
 828         case 'i':       /* date, ISO 8601-like */
 829                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(ISO8601)));
 830                 return placeholder_len;
 831         case 'I':       /* date, ISO 8601 strict */
 832                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(ISO8601_STRICT)));
 833                 return placeholder_len;
 834         case 'h':       /* date, human */
 835                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(HUMAN)));
 836                 return placeholder_len;
 837         case 's':
 838                 strbuf_addstr(sb, show_ident_date(&s, DATE_MODE(SHORT)));
 839                 return placeholder_len;
 840         }
 841
 842 skip:
 843         /*
 844          * reading from either a bogus commit, or a reflog entry with
 845          * %gn, %ge, etc.; 'sb' cannot be updated, but we still need
 846          * to compute a valid return value.
 847          */
 848         if (part == 'n' || part == 'e' || part == 't' || part == 'd'
 849             || part == 'D' || part == 'r' || part == 'i')
 850                 return placeholder_len;
 851
 852         return 0; /* unknown placeholder */
 853 }
 854
 855 struct chunk {
 856         size_t off;
 857         size_t len;
 858 };
 859
 860 enum flush_type {
 861         no_flush,
 862         flush_right,
 863         flush_left,
 864         flush_left_and_steal,
 865         flush_both
 866 };
 867
 868 enum trunc_type {
 869         trunc_none,
 870         trunc_left,
 871         trunc_middle,
 872         trunc_right
 873 };
 874
 875 struct format_commit_context {
 876         struct repository *repository;
 877         const struct commit *commit;
 878         const struct pretty_print_context *pretty_ctx;
 879         unsigned commit_header_parsed:1;
 880         unsigned commit_message_parsed:1;
 881         struct signature_check signature_check;
 882         enum flush_type flush_type;
 883         enum trunc_type truncate;
 884         const char *message;
 885         char *commit_encoding;
 886         size_t width, indent1, indent2;
 887         int auto_color;
 888         int padding;
 889
 890         /* These offsets are relative to the start of the commit message. */
 891         struct chunk author;
 892         struct chunk committer;
 893         size_t message_off;
 894         size_t subject_off;
 895         size_t body_off;
 896
 897         /* The following ones are relative to the result struct strbuf. */
 898         size_t wrap_start;
 899 };
 900
 901 static void parse_commit_header(struct format_commit_context *context)
 902 {
 903         const char *msg = context->message;
 904         int i;
 905
 906         for (i = 0; msg[i]; i++) {
 907                 const char *name;
 908                 int eol;
 909                 for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
 910                         ; /* do nothing */
 911
 912                 if (i == eol) {
 913                         break;
 914                 } else if (skip_prefix(msg + i, "author ", &name)) {
 915                         context->author.off = name - msg;
 916                         context->author.len = msg + eol - name;
 917                 } else if (skip_prefix(msg + i, "committer ", &name)) {
 918                         context->committer.off = name - msg;
 919                         context->committer.len = msg + eol - name;
 920                 }
 921                 i = eol;
 922         }
 923         context->message_off = i;
 924         context->commit_header_parsed = 1;
 925 }
 926
 927 static int istitlechar(char c)
 928 {
 929         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
 930                 (c >= '0' && c <= '9') || c == '.' || c == '_';
 931 }
 932
 933 void format_sanitized_subject(struct strbuf *sb, const char *msg, size_t len)
 934 {
 935         size_t trimlen;
 936         size_t start_len = sb->len;
 937         int space = 2;
 938         int i;
 939
 940         for (i = 0; i < len; i++) {
 941                 if (istitlechar(msg[i])) {
 942                         if (space == 1)
 943                                 strbuf_addch(sb, '-');
 944                         space = 0;
 945                         strbuf_addch(sb, msg[i]);
 946                         if (msg[i] == '.')
 947                                 while (msg[i+1] == '.')
 948                                         i++;
 949                 } else
 950                         space |= 1;
 951         }
 952
 953         /* trim any trailing '.' or '-' characters */
 954         trimlen = 0;
 955         while (sb->len - trimlen > start_len &&
 956                 (sb->buf[sb->len - 1 - trimlen] == '.'
 957                 || sb->buf[sb->len - 1 - trimlen] == '-'))
 958                 trimlen++;
 959         strbuf_remove(sb, sb->len - trimlen, trimlen);
 960 }
 961
 962 const char *format_subject(struct strbuf *sb, const char *msg,
 963                            const char *line_separator)
 964 {
 965         int first = 1;
 966
 967         for (;;) {
 968                 const char *line = msg;
 969                 int linelen = get_one_line(line);
 970
 971                 msg += linelen;
 972                 if (!linelen || is_blank_line(line, &linelen))
 973                         break;
 974
 975                 if (!sb)
 976                         continue;
 977                 strbuf_grow(sb, linelen + 2);
 978                 if (!first)
 979                         strbuf_addstr(sb, line_separator);
 980                 strbuf_add(sb, line, linelen);
 981                 first = 0;
 982         }
 983         return msg;
 984 }
 985
 986 static void parse_commit_message(struct format_commit_context *c)
 987 {
 988         const char *msg = c->message + c->message_off;
 989         const char *start = c->message;
 990
 991         msg = skip_blank_lines(msg);
 992         c->subject_off = msg - start;
 993
 994         msg = format_subject(NULL, msg, NULL);
 995         msg = skip_blank_lines(msg);
 996         c->body_off = msg - start;
 997
 998         c->commit_message_parsed = 1;
 999 }
1000
1001 static void strbuf_wrap(struct strbuf *sb, size_t pos,
1002                         size_t width, size_t indent1, size_t indent2)
1003 {
1004         struct strbuf tmp = STRBUF_INIT;
1005
1006         if (pos)
1007                 strbuf_add(&tmp, sb->buf, pos);
1008         strbuf_add_wrapped_text(&tmp, sb->buf + pos,
1009                                 cast_size_t_to_int(indent1),
1010                                 cast_size_t_to_int(indent2),
1011                                 cast_size_t_to_int(width));
1012         strbuf_swap(&tmp, sb);
1013         strbuf_release(&tmp);
1014 }
1015
1016 static void rewrap_message_tail(struct strbuf *sb,
1017                                 struct format_commit_context *c,
1018                                 size_t new_width, size_t new_indent1,
1019                                 size_t new_indent2)
1020 {
1021         if (c->width == new_width && c->indent1 == new_indent1 &&
1022             c->indent2 == new_indent2)
1023                 return;
1024         if (c->wrap_start < sb->len)
1025                 strbuf_wrap(sb, c->wrap_start, c->width, c->indent1, c->indent2);
1026         c->wrap_start = sb->len;
1027         c->width = new_width;
1028         c->indent1 = new_indent1;
1029         c->indent2 = new_indent2;
1030 }
1031
1032 static int format_reflog_person(struct strbuf *sb,
1033                                 char part,
1034                                 struct reflog_walk_info *log,
1035                                 const struct date_mode *dmode)
1036 {
1037         const char *ident;
1038
1039         if (!log)
1040                 return 2;
1041
1042         ident = get_reflog_ident(log);
1043         if (!ident)
1044                 return 2;
1045
1046         return format_person_part(sb, part, ident, strlen(ident), dmode);
1047 }
1048
1049 static size_t parse_color(struct strbuf *sb, /* in UTF-8 */
1050                           const char *placeholder,
1051                           struct format_commit_context *c)
1052 {
1053         const char *rest = placeholder;
1054         const char *basic_color = NULL;
1055
1056         if (placeholder[1] == '(') {
1057                 const char *begin = placeholder + 2;
1058                 const char *end = strchr(begin, ')');
1059                 char color[COLOR_MAXLEN];
1060
1061                 if (!end)
1062                         return 0;
1063
1064                 if (skip_prefix(begin, "auto,", &begin)) {
1065                         if (!want_color(c->pretty_ctx->color))
1066                                 return end - placeholder + 1;
1067                 } else if (skip_prefix(begin, "always,", &begin)) {
1068                         /* nothing to do; we do not respect want_color at all */
1069                 } else {
1070                         /* the default is the same as "auto" */
1071                         if (!want_color(c->pretty_ctx->color))
1072                                 return end - placeholder + 1;
1073                 }
1074
1075                 if (color_parse_mem(begin, end - begin, color) < 0)
1076                         die(_("unable to parse --pretty format"));
1077                 strbuf_addstr(sb, color);
1078                 return end - placeholder + 1;
1079         }
1080
1081         /*
1082          * We handle things like "%C(red)" above; for historical reasons, there
1083          * are a few colors that can be specified without parentheses (and
1084          * they cannot support things like "auto" or "always" at all).
1085          */
1086         if (skip_prefix(placeholder + 1, "red", &rest))
1087                 basic_color = GIT_COLOR_RED;
1088         else if (skip_prefix(placeholder + 1, "green", &rest))
1089                 basic_color = GIT_COLOR_GREEN;
1090         else if (skip_prefix(placeholder + 1, "blue", &rest))
1091                 basic_color = GIT_COLOR_BLUE;
1092         else if (skip_prefix(placeholder + 1, "reset", &rest))
1093                 basic_color = GIT_COLOR_RESET;
1094
1095         if (basic_color && want_color(c->pretty_ctx->color))
1096                 strbuf_addstr(sb, basic_color);
1097
1098         return rest - placeholder;
1099 }
1100
1101 static size_t parse_padding_placeholder(const char *placeholder,
1102                                         struct format_commit_context *c)
1103 {
1104         const char *ch = placeholder;
1105         enum flush_type flush_type;
1106         int to_column = 0;
1107
1108         switch (*ch++) {
1109         case '<':
1110                 flush_type = flush_right;
1111                 break;
1112         case '>':
1113                 if (*ch == '<') {
1114                         flush_type = flush_both;
1115                         ch++;
1116                 } else if (*ch == '>') {
1117                         flush_type = flush_left_and_steal;
1118                         ch++;
1119                 } else
1120                         flush_type = flush_left;
1121                 break;
1122         default:
1123                 return 0;
1124         }
1125
1126         /* the next value means "wide enough to that column" */
1127         if (*ch == '|') {
1128                 to_column = 1;
1129                 ch++;
1130         }
1131
1132         if (*ch == '(') {
1133                 const char *start = ch + 1;
1134                 const char *end = start + strcspn(start, ",)");
1135                 char *next;
1136                 int width;
1137                 if (!*end || end == start)
1138                         return 0;
1139                 width = strtol(start, &next, 10);
1140
1141                 /*
1142                  * We need to limit the amount of padding, or otherwise this
1143                  * would allow the user to pad the buffer by arbitrarily many
1144                  * bytes and thus cause resource exhaustion.
1145                  */
1146                 if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT)
1147                         return 0;
1148
1149                 if (next == start || width == 0)
1150                         return 0;
1151                 if (width < 0) {
1152                         if (to_column)
1153                                 width += term_columns();
1154                         if (width < 0)
1155                                 return 0;
1156                 }
1157                 c->padding = to_column ? -width : width;
1158                 c->flush_type = flush_type;
1159
1160                 if (*end == ',') {
1161                         start = end + 1;
1162                         end = strchr(start, ')');
1163                         if (!end || end == start)
1164                                 return 0;
1165                         if (starts_with(start, "trunc)"))
1166                                 c->truncate = trunc_right;
1167                         else if (starts_with(start, "ltrunc)"))
1168                                 c->truncate = trunc_left;
1169                         else if (starts_with(start, "mtrunc)"))
1170                                 c->truncate = trunc_middle;
1171                         else
1172                                 return 0;
1173                 } else
1174                         c->truncate = trunc_none;
1175
1176                 return end - placeholder + 1;
1177         }
1178         return 0;
1179 }
1180
1181 static int match_placeholder_arg_value(const char *to_parse, const char *candidate,
1182                                        const char **end, const char **valuestart,
1183                                        size_t *valuelen)
1184 {
1185         const char *p;
1186
1187         if (!(skip_prefix(to_parse, candidate, &p)))
1188                 return 0;
1189         if (valuestart) {
1190                 if (*p == '=') {
1191                         *valuestart = p + 1;
1192                         *valuelen = strcspn(*valuestart, ",)");
1193                         p = *valuestart + *valuelen;
1194                 } else {
1195                         if (*p != ',' && *p != ')')
1196                                 return 0;
1197                         *valuestart = NULL;
1198                         *valuelen = 0;
1199                 }
1200         }
1201         if (*p == ',') {
1202                 *end = p + 1;
1203                 return 1;
1204         }
1205         if (*p == ')') {
1206                 *end = p;
1207                 return 1;
1208         }
1209         return 0;
1210 }
1211
1212 static int match_placeholder_bool_arg(const char *to_parse, const char *candidate,
1213                                       const char **end, int *val)
1214 {
1215         const char *argval;
1216         char *strval;
1217         size_t arglen;
1218         int v;
1219
1220         if (!match_placeholder_arg_value(to_parse, candidate, end, &argval, &arglen))
1221                 return 0;
1222
1223         if (!argval) {
1224                 *val = 1;
1225                 return 1;
1226         }
1227
1228         strval = xstrndup(argval, arglen);
1229         v = git_parse_maybe_bool(strval);
1230         free(strval);
1231
1232         if (v == -1)
1233                 return 0;
1234
1235         *val = v;
1236
1237         return 1;
1238 }
1239
1240 static int format_trailer_match_cb(const struct strbuf *key, void *ud)
1241 {
1242         const struct string_list *list = ud;
1243         const struct string_list_item *item;
1244
1245         for_each_string_list_item (item, list) {
1246                 if (key->len == (uintptr_t)item->util &&
1247                     !strncasecmp(item->string, key->buf, key->len))
1248                         return 1;
1249         }
1250         return 0;
1251 }
1252
1253 int format_set_trailers_options(struct process_trailer_options *opts,
1254                                 struct string_list *filter_list,
1255                                 struct strbuf *sepbuf,
1256                                 struct strbuf *kvsepbuf,
1257                                 const char **arg,
1258                                 char **invalid_arg)
1259 {
1260         for (;;) {
1261                 const char *argval;
1262                 size_t arglen;
1263
1264                 if (**arg == ')')
1265                         break;
1266
1267                 if (match_placeholder_arg_value(*arg, "key", arg, &argval, &arglen)) {
1268                         uintptr_t len = arglen;
1269
1270                         if (!argval)
1271                                 return -1;
1272
1273                         if (len && argval[len - 1] == ':')
1274                                 len--;
1275                         string_list_append(filter_list, argval)->util = (char *)len;
1276
1277                         opts->filter = format_trailer_match_cb;
1278                         opts->filter_data = filter_list;
1279                         opts->only_trailers = 1;
1280                 } else if (match_placeholder_arg_value(*arg, "separator", arg, &argval, &arglen)) {
1281                         char *fmt;
1282
1283                         strbuf_reset(sepbuf);
1284                         fmt = xstrndup(argval, arglen);
1285                         strbuf_expand(sepbuf, fmt, strbuf_expand_literal_cb, NULL);
1286                         free(fmt);
1287                         opts->separator = sepbuf;
1288                 } else if (match_placeholder_arg_value(*arg, "key_value_separator", arg, &argval, &arglen)) {
1289                         char *fmt;
1290
1291                         strbuf_reset(kvsepbuf);
1292                         fmt = xstrndup(argval, arglen);
1293                         strbuf_expand(kvsepbuf, fmt, strbuf_expand_literal_cb, NULL);
1294                         free(fmt);
1295                         opts->key_value_separator = kvsepbuf;
1296                 } else if (!match_placeholder_bool_arg(*arg, "only", arg, &opts->only_trailers) &&
1297                            !match_placeholder_bool_arg(*arg, "unfold", arg, &opts->unfold) &&
1298                            !match_placeholder_bool_arg(*arg, "keyonly", arg, &opts->key_only) &&
1299                            !match_placeholder_bool_arg(*arg, "valueonly", arg, &opts->value_only)) {
1300                         if (invalid_arg) {
1301                                 size_t len = strcspn(*arg, ",)");
1302                                 *invalid_arg = xstrndup(*arg, len);
1303                         }
1304                         return -1;
1305                 }
1306         }
1307         return 0;
1308 }
1309
1310 static size_t parse_describe_args(const char *start, struct strvec *args)
1311 {
1312         struct {
1313                 char *name;
1314                 enum {
1315                         DESCRIBE_ARG_BOOL,
1316                         DESCRIBE_ARG_INTEGER,
1317                         DESCRIBE_ARG_STRING,
1318                 } type;
1319         }  option[] = {
1320                 { "tags", DESCRIBE_ARG_BOOL},
1321                 { "abbrev", DESCRIBE_ARG_INTEGER },
1322                 { "exclude", DESCRIBE_ARG_STRING },
1323                 { "match", DESCRIBE_ARG_STRING },
1324         };
1325         const char *arg = start;
1326
1327         for (;;) {
1328                 int found = 0;
1329                 const char *argval;
1330                 size_t arglen = 0;
1331                 int optval = 0;
1332                 int i;
1333
1334                 for (i = 0; !found && i < ARRAY_SIZE(option); i++) {
1335                         switch (option[i].type) {
1336                         case DESCRIBE_ARG_BOOL:
1337                                 if (match_placeholder_bool_arg(arg, option[i].name, &arg, &optval)) {
1338                                         if (optval)
1339                                                 strvec_pushf(args, "--%s", option[i].name);
1340                                         else
1341                                                 strvec_pushf(args, "--no-%s", option[i].name);
1342                                         found = 1;
1343                                 }
1344                                 break;
1345                         case DESCRIBE_ARG_INTEGER:
1346                                 if (match_placeholder_arg_value(arg, option[i].name, &arg,
1347                                                                 &argval, &arglen)) {
1348                                         char *endptr;
1349                                         if (!arglen)
1350                                                 return 0;
1351                                         strtol(argval, &endptr, 10);
1352                                         if (endptr - argval != arglen)
1353                                                 return 0;
1354                                         strvec_pushf(args, "--%s=%.*s", option[i].name, (int)arglen, argval);
1355                                         found = 1;
1356                                 }
1357                                 break;
1358                         case DESCRIBE_ARG_STRING:
1359                                 if (match_placeholder_arg_value(arg, option[i].name, &arg,
1360                                                                 &argval, &arglen)) {
1361                                         if (!arglen)
1362                                                 return 0;
1363                                         strvec_pushf(args, "--%s=%.*s", option[i].name, (int)arglen, argval);
1364                                         found = 1;
1365                                 }
1366                                 break;
1367                         }
1368                 }
1369                 if (!found)
1370                         break;
1371
1372         }
1373         return arg - start;
1374 }
1375
1376 static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
1377                                 const char *placeholder,
1378                                 void *context)
1379 {
1380         struct format_commit_context *c = context;
1381         const struct commit *commit = c->commit;
1382         const char *msg = c->message;
1383         struct commit_list *p;
1384         const char *arg, *eol;
1385         size_t res;
1386         char **slot;
1387
1388         /* these are independent of the commit */
1389         res = strbuf_expand_literal_cb(sb, placeholder, NULL);
1390         if (res)
1391                 return res;
1392
1393         switch (placeholder[0]) {
1394         case 'C':
1395                 if (starts_with(placeholder + 1, "(auto)")) {
1396                         c->auto_color = want_color(c->pretty_ctx->color);
1397                         if (c->auto_color && sb->len)
1398                                 strbuf_addstr(sb, GIT_COLOR_RESET);
1399                         return 7; /* consumed 7 bytes, "C(auto)" */
1400                 } else {
1401                         int ret = parse_color(sb, placeholder, c);
1402                         if (ret)
1403                                 c->auto_color = 0;
1404                         /*
1405                          * Otherwise, we decided to treat %C<unknown>
1406                          * as a literal string, and the previous
1407                          * %C(auto) is still valid.
1408                          */
1409                         return ret;
1410                 }
1411         case 'w':
1412                 if (placeholder[1] == '(') {
1413                         unsigned long width = 0, indent1 = 0, indent2 = 0;
1414                         char *next;
1415                         const char *start = placeholder + 2;
1416                         const char *end = strchr(start, ')');
1417                         if (!end)
1418                                 return 0;
1419                         if (end > start) {
1420                                 width = strtoul(start, &next, 10);
1421                                 if (*next == ',') {
1422                                         indent1 = strtoul(next + 1, &next, 10);
1423                                         if (*next == ',') {
1424                                                 indent2 = strtoul(next + 1,
1425                                                                  &next, 10);
1426                                         }
1427                                 }
1428                                 if (*next != ')')
1429                                         return 0;
1430                         }
1431
1432                         /*
1433                          * We need to limit the format here as it allows the
1434                          * user to prepend arbitrarily many bytes to the buffer
1435                          * when rewrapping.
1436                          */
1437                         if (width > FORMATTING_LIMIT ||
1438                             indent1 > FORMATTING_LIMIT ||
1439                             indent2 > FORMATTING_LIMIT)
1440                                 return 0;
1441                         rewrap_message_tail(sb, c, width, indent1, indent2);
1442                         return end - placeholder + 1;
1443                 } else
1444                         return 0;
1445
1446         case '<':
1447         case '>':
1448                 return parse_padding_placeholder(placeholder, c);
1449         }
1450
1451         if (skip_prefix(placeholder, "(describe", &arg)) {
1452                 struct child_process cmd = CHILD_PROCESS_INIT;
1453                 struct strbuf out = STRBUF_INIT;
1454                 struct strbuf err = STRBUF_INIT;
1455                 struct pretty_print_describe_status *describe_status;
1456
1457                 describe_status = c->pretty_ctx->describe_status;
1458                 if (describe_status) {
1459                         if (!describe_status->max_invocations)
1460                                 return 0;
1461                         describe_status->max_invocations--;
1462                 }
1463
1464                 cmd.git_cmd = 1;
1465                 strvec_push(&cmd.args, "describe");
1466
1467                 if (*arg == ':') {
1468                         arg++;
1469                         arg += parse_describe_args(arg, &cmd.args);
1470                 }
1471
1472                 if (*arg != ')') {
1473                         child_process_clear(&cmd);
1474                         return 0;
1475                 }
1476
1477                 strvec_push(&cmd.args, oid_to_hex(&commit->object.oid));
1478                 pipe_command(&cmd, NULL, 0, &out, 0, &err, 0);
1479                 strbuf_rtrim(&out);
1480                 strbuf_addbuf(sb, &out);
1481                 strbuf_release(&out);
1482                 strbuf_release(&err);
1483                 return arg - placeholder + 1;
1484         }
1485
1486         /* these depend on the commit */
1487         if (!commit->object.parsed)
1488                 parse_object(the_repository, &commit->object.oid);
1489
1490         switch (placeholder[0]) {
1491         case 'H':               /* commit hash */
1492                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
1493                 strbuf_addstr(sb, oid_to_hex(&commit->object.oid));
1494                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
1495                 return 1;
1496         case 'h':               /* abbreviated commit hash */
1497                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT));
1498                 strbuf_add_unique_abbrev(sb, &commit->object.oid,
1499                                          c->pretty_ctx->abbrev);
1500                 strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
1501                 return 1;
1502         case 'T':               /* tree hash */
1503                 strbuf_addstr(sb, oid_to_hex(get_commit_tree_oid(commit)));
1504                 return 1;
1505         case 't':               /* abbreviated tree hash */
1506                 strbuf_add_unique_abbrev(sb,
1507                                          get_commit_tree_oid(commit),
1508                                          c->pretty_ctx->abbrev);
1509                 return 1;
1510         case 'P':               /* parent hashes */
1511                 for (p = commit->parents; p; p = p->next) {
1512                         if (p != commit->parents)
1513                                 strbuf_addch(sb, ' ');
1514                         strbuf_addstr(sb, oid_to_hex(&p->item->object.oid));
1515                 }
1516                 return 1;
1517         case 'p':               /* abbreviated parent hashes */
1518                 for (p = commit->parents; p; p = p->next) {
1519                         if (p != commit->parents)
1520                                 strbuf_addch(sb, ' ');
1521                         strbuf_add_unique_abbrev(sb, &p->item->object.oid,
1522                                                  c->pretty_ctx->abbrev);
1523                 }
1524                 return 1;
1525         case 'm':               /* left/right/bottom */
1526                 strbuf_addstr(sb, get_revision_mark(NULL, commit));
1527                 return 1;
1528         case 'd':
1529                 format_decorations(sb, commit, c->auto_color);
1530                 return 1;
1531         case 'D':
1532                 format_decorations_extended(sb, commit, c->auto_color, "", ", ", "");
1533                 return 1;
1534         case 'S':               /* tag/branch like --source */
1535                 if (!(c->pretty_ctx->rev && c->pretty_ctx->rev->sources))
1536                         return 0;
1537                 slot = revision_sources_at(c->pretty_ctx->rev->sources, commit);
1538                 if (!(slot && *slot))
1539                         return 0;
1540                 strbuf_addstr(sb, *slot);
1541                 return 1;
1542         case 'g':               /* reflog info */
1543                 switch(placeholder[1]) {
1544                 case 'd':       /* reflog selector */
1545                 case 'D':
1546                         if (c->pretty_ctx->reflog_info)
1547                                 get_reflog_selector(sb,
1548                                                     c->pretty_ctx->reflog_info,
1549                                                     &c->pretty_ctx->date_mode,
1550                                                     c->pretty_ctx->date_mode_explicit,
1551                                                     (placeholder[1] == 'd'));
1552                         return 2;
1553                 case 's':       /* reflog message */
1554                         if (c->pretty_ctx->reflog_info)
1555                                 get_reflog_message(sb, c->pretty_ctx->reflog_info);
1556                         return 2;
1557                 case 'n':
1558                 case 'N':
1559                 case 'e':
1560                 case 'E':
1561                         return format_reflog_person(sb,
1562                                                     placeholder[1],
1563                                                     c->pretty_ctx->reflog_info,
1564                                                     &c->pretty_ctx->date_mode);
1565                 }
1566                 return 0;       /* unknown %g placeholder */
1567         case 'N':
1568                 if (c->pretty_ctx->notes_message) {
1569                         strbuf_addstr(sb, c->pretty_ctx->notes_message);
1570                         return 1;
1571                 }
1572                 return 0;
1573         }
1574
1575         if (placeholder[0] == 'G') {
1576                 if (!c->signature_check.result)
1577                         check_commit_signature(c->commit, &(c->signature_check));
1578                 switch (placeholder[1]) {
1579                 case 'G':
1580                         if (c->signature_check.output)
1581                                 strbuf_addstr(sb, c->signature_check.output);
1582                         break;
1583                 case '?':
1584                         switch (c->signature_check.result) {
1585                         case 'G':
1586                                 switch (c->signature_check.trust_level) {
1587                                 case TRUST_UNDEFINED:
1588                                 case TRUST_NEVER:
1589                                         strbuf_addch(sb, 'U');
1590                                         break;
1591                                 default:
1592                                         strbuf_addch(sb, 'G');
1593                                         break;
1594                                 }
1595                                 break;
1596                         case 'B':
1597                         case 'E':
1598                         case 'N':
1599                         case 'X':
1600                         case 'Y':
1601                         case 'R':
1602                                 strbuf_addch(sb, c->signature_check.result);
1603                         }
1604                         break;
1605                 case 'S':
1606                         if (c->signature_check.signer)
1607                                 strbuf_addstr(sb, c->signature_check.signer);
1608                         break;
1609                 case 'K':
1610                         if (c->signature_check.key)
1611                                 strbuf_addstr(sb, c->signature_check.key);
1612                         break;
1613                 case 'F':
1614                         if (c->signature_check.fingerprint)
1615                                 strbuf_addstr(sb, c->signature_check.fingerprint);
1616                         break;
1617                 case 'P':
1618                         if (c->signature_check.primary_key_fingerprint)
1619                                 strbuf_addstr(sb, c->signature_check.primary_key_fingerprint);
1620                         break;
1621                 case 'T':
1622                         strbuf_addstr(sb, gpg_trust_level_to_str(c->signature_check.trust_level));
1623                         break;
1624                 default:
1625                         return 0;
1626                 }
1627                 return 2;
1628         }
1629
1630         /* For the rest we have to parse the commit header. */
1631         if (!c->commit_header_parsed) {
1632                 msg = c->message =
1633                         repo_logmsg_reencode(c->repository, commit,
1634                                              &c->commit_encoding, "UTF-8");
1635                 parse_commit_header(c);
1636         }
1637
1638         switch (placeholder[0]) {
1639         case 'a':       /* author ... */
1640                 return format_person_part(sb, placeholder[1],
1641                                    msg + c->author.off, c->author.len,
1642                                    &c->pretty_ctx->date_mode);
1643         case 'c':       /* committer ... */
1644                 return format_person_part(sb, placeholder[1],
1645                                    msg + c->committer.off, c->committer.len,
1646                                    &c->pretty_ctx->date_mode);
1647         case 'e':       /* encoding */
1648                 if (c->commit_encoding)
1649                         strbuf_addstr(sb, c->commit_encoding);
1650                 return 1;
1651         case 'B':       /* raw body */
1652                 /* message_off is always left at the initial newline */
1653                 strbuf_addstr(sb, msg + c->message_off + 1);
1654                 return 1;
1655         }
1656
1657         /* Now we need to parse the commit message. */
1658         if (!c->commit_message_parsed)
1659                 parse_commit_message(c);
1660
1661         switch (placeholder[0]) {
1662         case 's':       /* subject */
1663                 format_subject(sb, msg + c->subject_off, " ");
1664                 return 1;
1665         case 'f':       /* sanitized subject */
1666                 eol = strchrnul(msg + c->subject_off, '\n');
1667                 format_sanitized_subject(sb, msg + c->subject_off, eol - (msg + c->subject_off));
1668                 return 1;
1669         case 'b':       /* body */
1670                 strbuf_addstr(sb, msg + c->body_off);
1671                 return 1;
1672         }
1673
1674         if (skip_prefix(placeholder, "(trailers", &arg)) {
1675                 struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
1676                 struct string_list filter_list = STRING_LIST_INIT_NODUP;
1677                 struct strbuf sepbuf = STRBUF_INIT;
1678                 struct strbuf kvsepbuf = STRBUF_INIT;
1679                 size_t ret = 0;
1680
1681                 opts.no_divider = 1;
1682
1683                 if (*arg == ':') {
1684                         arg++;
1685                         if (format_set_trailers_options(&opts, &filter_list, &sepbuf, &kvsepbuf, &arg, NULL))
1686                                 goto trailer_out;
1687                 }
1688                 if (*arg == ')') {
1689                         format_trailers_from_commit(sb, msg + c->subject_off, &opts);
1690                         ret = arg - placeholder + 1;
1691                 }
1692         trailer_out:
1693                 string_list_clear(&filter_list, 0);
1694                 strbuf_release(&sepbuf);
1695                 return ret;
1696         }
1697
1698         return 0;       /* unknown placeholder */
1699 }
1700
1701 static size_t format_and_pad_commit(struct strbuf *sb, /* in UTF-8 */
1702                                     const char *placeholder,
1703                                     struct format_commit_context *c)
1704 {
1705         struct strbuf local_sb = STRBUF_INIT;
1706         size_t total_consumed = 0;
1707         int len, padding = c->padding;
1708
1709         if (padding < 0) {
1710                 const char *start = strrchr(sb->buf, '\n');
1711                 int occupied;
1712                 if (!start)
1713                         start = sb->buf;
1714                 occupied = utf8_strnwidth(start, strlen(start), 1);
1715                 occupied += c->pretty_ctx->graph_width;
1716                 padding = (-padding) - occupied;
1717         }
1718         while (1) {
1719                 int modifier = *placeholder == 'C';
1720                 size_t consumed = format_commit_one(&local_sb, placeholder, c);
1721                 total_consumed += consumed;
1722
1723                 if (!modifier)
1724                         break;
1725
1726                 placeholder += consumed;
1727                 if (*placeholder != '%')
1728                         break;
1729                 placeholder++;
1730                 total_consumed++;
1731         }
1732         len = utf8_strnwidth(local_sb.buf, local_sb.len, 1);
1733
1734         if (c->flush_type == flush_left_and_steal) {
1735                 const char *ch = sb->buf + sb->len - 1;
1736                 while (len > padding && ch > sb->buf) {
1737                         const char *p;
1738                         if (*ch == ' ') {
1739                                 ch--;
1740                                 padding++;
1741                                 continue;
1742                         }
1743                         /* check for trailing ansi sequences */
1744                         if (*ch != 'm')
1745                                 break;
1746                         p = ch - 1;
1747                         while (p > sb->buf && ch - p < 10 && *p != '\033')
1748                                 p--;
1749                         if (*p != '\033' ||
1750                             ch + 1 - p != display_mode_esc_sequence_len(p))
1751                                 break;
1752                         /*
1753                          * got a good ansi sequence, put it back to
1754                          * local_sb as we're cutting sb
1755                          */
1756                         strbuf_insert(&local_sb, 0, p, ch + 1 - p);
1757                         ch = p - 1;
1758                 }
1759                 strbuf_setlen(sb, ch + 1 - sb->buf);
1760                 c->flush_type = flush_left;
1761         }
1762
1763         if (len > padding) {
1764                 switch (c->truncate) {
1765                 case trunc_left:
1766                         strbuf_utf8_replace(&local_sb,
1767                                             0, len - (padding - 2),
1768                                             "..");
1769                         break;
1770                 case trunc_middle:
1771                         strbuf_utf8_replace(&local_sb,
1772                                             padding / 2 - 1,
1773                                             len - (padding - 2),
1774                                             "..");
1775                         break;
1776                 case trunc_right:
1777                         strbuf_utf8_replace(&local_sb,
1778                                             padding - 2, len - (padding - 2),
1779                                             "..");
1780                         break;
1781                 case trunc_none:
1782                         break;
1783                 }
1784                 strbuf_addbuf(sb, &local_sb);
1785         } else {
1786                 size_t sb_len = sb->len, offset = 0;
1787                 if (c->flush_type == flush_left)
1788                         offset = padding - len;
1789                 else if (c->flush_type == flush_both)
1790                         offset = (padding - len) / 2;
1791                 /*
1792                  * we calculate padding in columns, now
1793                  * convert it back to chars
1794                  */
1795                 padding = padding - len + local_sb.len;
1796                 strbuf_addchars(sb, ' ', padding);
1797                 memcpy(sb->buf + sb_len + offset, local_sb.buf,
1798                        local_sb.len);
1799         }
1800         strbuf_release(&local_sb);
1801         c->flush_type = no_flush;
1802         return total_consumed;
1803 }
1804
1805 static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
1806                                  const char *placeholder,
1807                                  void *context)
1808 {
1809         size_t consumed, orig_len;
1810         enum {
1811                 NO_MAGIC,
1812                 ADD_LF_BEFORE_NON_EMPTY,
1813                 DEL_LF_BEFORE_EMPTY,
1814                 ADD_SP_BEFORE_NON_EMPTY
1815         } magic = NO_MAGIC;
1816
1817         switch (placeholder[0]) {
1818         case '-':
1819                 magic = DEL_LF_BEFORE_EMPTY;
1820                 break;
1821         case '+':
1822                 magic = ADD_LF_BEFORE_NON_EMPTY;
1823                 break;
1824         case ' ':
1825                 magic = ADD_SP_BEFORE_NON_EMPTY;
1826                 break;
1827         default:
1828                 break;
1829         }
1830         if (magic != NO_MAGIC) {
1831                 placeholder++;
1832
1833                 switch (placeholder[0]) {
1834                 case 'w':
1835                         /*
1836                          * `%+w()` cannot ever expand to a non-empty string,
1837                          * and it potentially changes the layout of preceding
1838                          * contents. We're thus not able to handle the magic in
1839                          * this combination and refuse the pattern.
1840                          */
1841                         return 0;
1842                 };
1843         }
1844
1845         orig_len = sb->len;
1846         if (((struct format_commit_context *)context)->flush_type != no_flush)
1847                 consumed = format_and_pad_commit(sb, placeholder, context);
1848         else
1849                 consumed = format_commit_one(sb, placeholder, context);
1850         if (magic == NO_MAGIC)
1851                 return consumed;
1852
1853         if ((orig_len == sb->len) && magic == DEL_LF_BEFORE_EMPTY) {
1854                 while (sb->len && sb->buf[sb->len - 1] == '\n')
1855                         strbuf_setlen(sb, sb->len - 1);
1856         } else if (orig_len != sb->len) {
1857                 if (magic == ADD_LF_BEFORE_NON_EMPTY)
1858                         strbuf_insertstr(sb, orig_len, "\n");
1859                 else if (magic == ADD_SP_BEFORE_NON_EMPTY)
1860                         strbuf_insertstr(sb, orig_len, " ");
1861         }
1862         return consumed + 1;
1863 }
1864
1865 static size_t userformat_want_item(struct strbuf *sb UNUSED,
1866                                    const char *placeholder,
1867                                    void *context)
1868 {
1869         struct userformat_want *w = context;
1870
1871         if (*placeholder == '+' || *placeholder == '-' || *placeholder == ' ')
1872                 placeholder++;
1873
1874         switch (*placeholder) {
1875         case 'N':
1876                 w->notes = 1;
1877                 break;
1878         case 'S':
1879                 w->source = 1;
1880                 break;
1881         case 'd':
1882         case 'D':
1883                 w->decorate = 1;
1884                 break;
1885         }
1886         return 0;
1887 }
1888
1889 void userformat_find_requirements(const char *fmt, struct userformat_want *w)
1890 {
1891         struct strbuf dummy = STRBUF_INIT;
1892
1893         if (!fmt) {
1894                 if (!user_format)
1895                         return;
1896                 fmt = user_format;
1897         }
1898         strbuf_expand(&dummy, fmt, userformat_want_item, w);
1899         strbuf_release(&dummy);
1900 }
1901
1902 void repo_format_commit_message(struct repository *r,
1903                                 const struct commit *commit,
1904                                 const char *format, struct strbuf *sb,
1905                                 const struct pretty_print_context *pretty_ctx)
1906 {
1907         struct format_commit_context context = {
1908                 .repository = r,
1909                 .commit = commit,
1910                 .pretty_ctx = pretty_ctx,
1911                 .wrap_start = sb->len
1912         };
1913         const char *output_enc = pretty_ctx->output_encoding;
1914         const char *utf8 = "UTF-8";
1915
1916         strbuf_expand(sb, format, format_commit_item, &context);
1917         rewrap_message_tail(sb, &context, 0, 0, 0);
1918
1919         /*
1920          * Convert output to an actual output encoding; note that
1921          * format_commit_item() will always use UTF-8, so we don't
1922          * have to bother if that's what the output wants.
1923          */
1924         if (output_enc) {
1925                 if (same_encoding(utf8, output_enc))
1926                         output_enc = NULL;
1927         } else {
1928                 if (context.commit_encoding &&
1929                     !same_encoding(context.commit_encoding, utf8))
1930                         output_enc = context.commit_encoding;
1931         }
1932
1933         if (output_enc) {
1934                 size_t outsz;
1935                 char *out = reencode_string_len(sb->buf, sb->len,
1936                                                 output_enc, utf8, &outsz);
1937                 if (out)
1938                         strbuf_attach(sb, out, outsz, outsz + 1);
1939         }
1940
1941         free(context.commit_encoding);
1942         repo_unuse_commit_buffer(r, commit, context.message);
1943 }
1944
1945 static void pp_header(struct pretty_print_context *pp,
1946                       const char *encoding,
1947                       const struct commit *commit,
1948                       const char **msg_p,
1949                       struct strbuf *sb)
1950 {
1951         int parents_shown = 0;
1952
1953         for (;;) {
1954                 const char *name, *line = *msg_p;
1955                 int linelen = get_one_line(*msg_p);
1956
1957                 if (!linelen)
1958                         return;
1959                 *msg_p += linelen;
1960
1961                 if (linelen == 1)
1962                         /* End of header */
1963                         return;
1964
1965                 if (pp->fmt == CMIT_FMT_RAW) {
1966                         strbuf_add(sb, line, linelen);
1967                         continue;
1968                 }
1969
1970                 if (starts_with(line, "parent ")) {
1971                         if (linelen != the_hash_algo->hexsz + 8)
1972                                 die("bad parent line in commit");
1973                         continue;
1974                 }
1975
1976                 if (!parents_shown) {
1977                         unsigned num = commit_list_count(commit->parents);
1978                         /* with enough slop */
1979                         strbuf_grow(sb, num * (GIT_MAX_HEXSZ + 10) + 20);
1980                         add_merge_info(pp, sb, commit);
1981                         parents_shown = 1;
1982                 }
1983
1984                 /*
1985                  * MEDIUM == DEFAULT shows only author with dates.
1986                  * FULL shows both authors but not dates.
1987                  * FULLER shows both authors and dates.
1988                  */
1989                 if (skip_prefix(line, "author ", &name)) {
1990                         strbuf_grow(sb, linelen + 80);
1991                         pp_user_info(pp, "Author", sb, name, encoding);
1992                 }
1993                 if (skip_prefix(line, "committer ", &name) &&
1994                     (pp->fmt == CMIT_FMT_FULL || pp->fmt == CMIT_FMT_FULLER)) {
1995                         strbuf_grow(sb, linelen + 80);
1996                         pp_user_info(pp, "Commit", sb, name, encoding);
1997                 }
1998         }
1999 }
2000
2001 void pp_title_line(struct pretty_print_context *pp,
2002                    const char **msg_p,
2003                    struct strbuf *sb,
2004                    const char *encoding,
2005                    int need_8bit_cte)
2006 {
2007         static const int max_length = 78; /* per rfc2047 */
2008         struct strbuf title;
2009
2010         strbuf_init(&title, 80);
2011         *msg_p = format_subject(&title, *msg_p,
2012                                 pp->preserve_subject ? "\n" : " ");
2013
2014         strbuf_grow(sb, title.len + 1024);
2015         if (pp->print_email_subject) {
2016                 if (pp->rev)
2017                         fmt_output_email_subject(sb, pp->rev);
2018                 if (pp->encode_email_headers &&
2019                     needs_rfc2047_encoding(title.buf, title.len))
2020                         add_rfc2047(sb, title.buf, title.len,
2021                                                 encoding, RFC2047_SUBJECT);
2022                 else
2023                         strbuf_add_wrapped_bytes(sb, title.buf, title.len,
2024                                          -last_line_length(sb), 1, max_length);
2025         } else {
2026                 strbuf_addbuf(sb, &title);
2027         }
2028         strbuf_addch(sb, '\n');
2029
2030         if (need_8bit_cte == 0) {
2031                 int i;
2032                 for (i = 0; i < pp->in_body_headers.nr; i++) {
2033                         if (has_non_ascii(pp->in_body_headers.items[i].string)) {
2034                                 need_8bit_cte = 1;
2035                                 break;
2036                         }
2037                 }
2038         }
2039
2040         if (need_8bit_cte > 0) {
2041                 const char *header_fmt =
2042                         "MIME-Version: 1.0\n"
2043                         "Content-Type: text/plain; charset=%s\n"
2044                         "Content-Transfer-Encoding: 8bit\n";
2045                 strbuf_addf(sb, header_fmt, encoding);
2046         }
2047         if (pp->after_subject) {
2048                 strbuf_addstr(sb, pp->after_subject);
2049         }
2050         if (cmit_fmt_is_mail(pp->fmt)) {
2051                 strbuf_addch(sb, '\n');
2052         }
2053
2054         if (pp->in_body_headers.nr) {
2055                 int i;
2056                 for (i = 0; i < pp->in_body_headers.nr; i++) {
2057                         strbuf_addstr(sb, pp->in_body_headers.items[i].string);
2058                         free(pp->in_body_headers.items[i].string);
2059                 }
2060                 string_list_clear(&pp->in_body_headers, 0);
2061                 strbuf_addch(sb, '\n');
2062         }
2063
2064         strbuf_release(&title);
2065 }
2066
2067 static int pp_utf8_width(const char *start, const char *end)
2068 {
2069         int width = 0;
2070         size_t remain = end - start;
2071
2072         while (remain) {
2073                 int n = utf8_width(&start, &remain);
2074                 if (n < 0 || !start)
2075                         return -1;
2076                 width += n;
2077         }
2078         return width;
2079 }
2080
2081 static void strbuf_add_tabexpand(struct strbuf *sb, struct grep_opt *opt,
2082                                  int color, int tabwidth, const char *line,
2083                                  int linelen)
2084 {
2085         const char *tab;
2086
2087         while ((tab = memchr(line, '\t', linelen)) != NULL) {
2088                 int width = pp_utf8_width(line, tab);
2089
2090                 /*
2091                  * If it wasn't well-formed utf8, or it
2092                  * had characters with badly defined
2093                  * width (control characters etc), just
2094                  * give up on trying to align things.
2095                  */
2096                 if (width < 0)
2097                         break;
2098
2099                 /* Output the data .. */
2100                 append_line_with_color(sb, opt, line, tab - line, color,
2101                                        GREP_CONTEXT_BODY,
2102                                        GREP_HEADER_FIELD_MAX);
2103
2104                 /* .. and the de-tabified tab */
2105                 strbuf_addchars(sb, ' ', tabwidth - (width % tabwidth));
2106
2107                 /* Skip over the printed part .. */
2108                 linelen -= tab + 1 - line;
2109                 line = tab + 1;
2110         }
2111
2112         /*
2113          * Print out everything after the last tab without
2114          * worrying about width - there's nothing more to
2115          * align.
2116          */
2117         append_line_with_color(sb, opt, line, linelen, color, GREP_CONTEXT_BODY,
2118                                GREP_HEADER_FIELD_MAX);
2119 }
2120
2121 /*
2122  * pp_handle_indent() prints out the intendation, and
2123  * the whole line (without the final newline), after
2124  * de-tabifying.
2125  */
2126 static void pp_handle_indent(struct pretty_print_context *pp,
2127                              struct strbuf *sb, int indent,
2128                              const char *line, int linelen)
2129 {
2130         struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
2131
2132         strbuf_addchars(sb, ' ', indent);
2133         if (pp->expand_tabs_in_log)
2134                 strbuf_add_tabexpand(sb, opt, pp->color, pp->expand_tabs_in_log,
2135                                      line, linelen);
2136         else
2137                 append_line_with_color(sb, opt, line, linelen, pp->color,
2138                                        GREP_CONTEXT_BODY,
2139                                        GREP_HEADER_FIELD_MAX);
2140 }
2141
2142 static int is_mboxrd_from(const char *line, int len)
2143 {
2144         /*
2145          * a line matching /^From $/ here would only have len == 4
2146          * at this point because is_empty_line would've trimmed all
2147          * trailing space
2148          */
2149         return len > 4 && starts_with(line + strspn(line, ">"), "From ");
2150 }
2151
2152 void pp_remainder(struct pretty_print_context *pp,
2153                   const char **msg_p,
2154                   struct strbuf *sb,
2155                   int indent)
2156 {
2157         struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
2158         int first = 1;
2159
2160         for (;;) {
2161                 const char *line = *msg_p;
2162                 int linelen = get_one_line(line);
2163                 *msg_p += linelen;
2164
2165                 if (!linelen)
2166                         break;
2167
2168                 if (is_blank_line(line, &linelen)) {
2169                         if (first)
2170                                 continue;
2171                         if (pp->fmt == CMIT_FMT_SHORT)
2172                                 break;
2173                 }
2174                 first = 0;
2175
2176                 strbuf_grow(sb, linelen + indent + 20);
2177                 if (indent)
2178                         pp_handle_indent(pp, sb, indent, line, linelen);
2179                 else if (pp->expand_tabs_in_log)
2180                         strbuf_add_tabexpand(sb, opt, pp->color,
2181                                              pp->expand_tabs_in_log, line,
2182                                              linelen);
2183                 else {
2184                         if (pp->fmt == CMIT_FMT_MBOXRD &&
2185                                         is_mboxrd_from(line, linelen))
2186                                 strbuf_addch(sb, '>');
2187
2188                         append_line_with_color(sb, opt, line, linelen,
2189                                                pp->color, GREP_CONTEXT_BODY,
2190                                                GREP_HEADER_FIELD_MAX);
2191                 }
2192                 strbuf_addch(sb, '\n');
2193         }
2194 }
2195
2196 void pretty_print_commit(struct pretty_print_context *pp,
2197                          const struct commit *commit,
2198                          struct strbuf *sb)
2199 {
2200         unsigned long beginning_of_body;
2201         int indent = 4;
2202         const char *msg;
2203         const char *reencoded;
2204         const char *encoding;
2205         int need_8bit_cte = pp->need_8bit_cte;
2206
2207         if (pp->fmt == CMIT_FMT_USERFORMAT) {
2208                 repo_format_commit_message(the_repository, commit,
2209                                            user_format, sb, pp);
2210                 return;
2211         }
2212
2213         encoding = get_log_output_encoding();
2214         msg = reencoded = repo_logmsg_reencode(the_repository, commit, NULL,
2215                                                encoding);
2216
2217         if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt))
2218                 indent = 0;
2219
2220         /*
2221          * We need to check and emit Content-type: to mark it
2222          * as 8-bit if we haven't done so.
2223          */
2224         if (cmit_fmt_is_mail(pp->fmt) && need_8bit_cte == 0) {
2225                 int i, ch, in_body;
2226
2227                 for (in_body = i = 0; (ch = msg[i]); i++) {
2228                         if (!in_body) {
2229                                 /* author could be non 7-bit ASCII but
2230                                  * the log may be so; skip over the
2231                                  * header part first.
2232                                  */
2233                                 if (ch == '\n' && msg[i+1] == '\n')
2234                                         in_body = 1;
2235                         }
2236                         else if (non_ascii(ch)) {
2237                                 need_8bit_cte = 1;
2238                                 break;
2239                         }
2240                 }
2241         }
2242
2243         pp_header(pp, encoding, commit, &msg, sb);
2244         if (pp->fmt != CMIT_FMT_ONELINE && !pp->print_email_subject) {
2245                 strbuf_addch(sb, '\n');
2246         }
2247
2248         /* Skip excess blank lines at the beginning of body, if any... */
2249         msg = skip_blank_lines(msg);
2250
2251         /* These formats treat the title line specially. */
2252         if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt))
2253                 pp_title_line(pp, &msg, sb, encoding, need_8bit_cte);
2254
2255         beginning_of_body = sb->len;
2256         if (pp->fmt != CMIT_FMT_ONELINE)
2257                 pp_remainder(pp, &msg, sb, indent);
2258         strbuf_rtrim(sb);
2259
2260         /* Make sure there is an EOLN for the non-oneline case */
2261         if (pp->fmt != CMIT_FMT_ONELINE)
2262                 strbuf_addch(sb, '\n');
2263
2264         /*
2265          * The caller may append additional body text in e-mail
2266          * format.  Make sure we did not strip the blank line
2267          * between the header and the body.
2268          */
2269         if (cmit_fmt_is_mail(pp->fmt) && sb->len <= beginning_of_body)
2270                 strbuf_addch(sb, '\n');
2271
2272         repo_unuse_commit_buffer(the_repository, commit, reencoded);
2273 }
2274
2275 void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit,
2276                     struct strbuf *sb)
2277 {
2278         struct pretty_print_context pp = {0};
2279         pp.fmt = fmt;
2280         pretty_print_commit(&pp, commit, sb);
2281 }