src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2012  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the display width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    is empty or l or the value of the pD or pI or pMd (sans "d") macros.
  74    Also, %% in a format stands for a single % in the output.  A % that
  75    does not introduce a valid %-sequence causes undefined behavior.
  76
  77    The + flag character inserts a + before any positive number, while a space
  78    inserts a space before any positive number; these flags only affect %d, %o,
  79    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  80    as described below.  For signed numerical arguments only, the ` ' (space)
  81    flag causes the result to be prefixed with a space character if it does not
  82    start with a sign (+ or -).
  83
  84    The l (lower-case letter ell) length modifier is a `long' data type
  85    modifier: it is supported for %d, %o, and %x conversions of integral
  86    arguments, must immediately precede the conversion specifier, and means that
  87    the respective argument is to be treated as `long int' or `unsigned long
  88    int'.  Similarly, the value of the pD macro means to use ptrdiff_t,
  89    the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
  90    value of the pMd etc. macros means to use intmax_t or uintmax_t,
  91    and the empty length modifier means `int' or `unsigned int'.
  92
  93    The width specifier supplies a lower limit for the length of the printed
  94    representation.  The padding, if any, normally goes on the left, but it goes
  95    on the right if the - flag is present.  The padding character is normally a
  96    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  97    The - flag takes precedence over the 0 flag.
  98
  99    For %e, %f, and %g sequences, the number after the "." in the precision
 100    specifier says how many decimal places to show; if zero, the decimal point
 101    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 102
 103 #include <config.h>
 104 #include <stdio.h>
 105 #include <setjmp.h>
 106 #include <float.h>
 107 #include <unistd.h>
 108 #include <limits.h>
 109
 110 #include "lisp.h"
 111
 112 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 113    don't have to include others because CHAR_HEAD_P does not contains
 114    another macro.  */
 115 #include "character.h"
 116
 117 #ifndef DBL_MAX_10_EXP
 118 #define DBL_MAX_10_EXP 308 /* IEEE double */
 119 #endif
 120
 121 /* Generate output from a format-spec FORMAT,
 122    terminated at position FORMAT_END.
 123    (*FORMAT_END is not part of the format, but must exist and be readable.)
 124    Output goes in BUFFER, which has room for BUFSIZE chars.
 125    BUFSIZE must be positive.  If the output does not fit, truncate it
 126    to fit and return BUFSIZE - 1; if this truncates a multibyte
 127    sequence, store '\0' into the sequence's first byte.
 128    Returns the number of bytes stored into BUFFER, excluding
 129    the terminating null byte.  Output is always null-terminated.
 130    String arguments are passed as C strings.
 131    Integers are passed as C integers.  */
 132
 133 ptrdiff_t
 134 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
 135         const char *format_end, va_list ap)
 136 {
 137   const char *fmt = format;     /* Pointer into format string.  */
 138   char *bufptr = buffer;        /* Pointer into output buffer.  */
 139
 140   /* Use this for sprintf unless we need something really big.  */
 141   char tembuf[DBL_MAX_10_EXP + 100];
 142
 143   /* Size of sprintf_buffer.  */
 144   ptrdiff_t size_allocated = sizeof (tembuf);
 145
 146   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 147   char *sprintf_buffer = tembuf;
 148
 149   /* Buffer we have got with malloc.  */
 150   char *big_buffer = NULL;
 151
 152   ptrdiff_t tem = -1;
 153   char *string;
 154   char fixed_buffer[20];        /* Default buffer for small formatting. */
 155   char *fmtcpy;
 156   int minlen;
 157   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 158   USE_SAFE_ALLOCA;
 159
 160   if (format_end == 0)
 161     format_end = format + strlen (format);
 162
 163   fmtcpy = (format_end - format < sizeof (fixed_buffer) - 1
 164             ? fixed_buffer
 165             : SAFE_ALLOCA (format_end - format + 1));
 166
 167   bufsize--;
 168
 169   /* Loop until end of format string or buffer full. */
 170   while (fmt < format_end && bufsize > 0)
 171     {
 172       if (*fmt == '%')  /* Check for a '%' character */
 173         {
 174           ptrdiff_t size_bound = 0;
 175           ptrdiff_t width;  /* Columns occupied by STRING on display.  */
 176           enum {
 177             pDlen = sizeof pD - 1,
 178             pIlen = sizeof pI - 1,
 179             pMlen = sizeof pMd - 2
 180           };
 181           enum {
 182             no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
 183           } length_modifier = no_modifier;
 184           static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
 185           int maxmlen = max (max (1, pDlen), max (pIlen, pMlen));
 186           int mlen;
 187
 188           fmt++;
 189           /* Copy this one %-spec into fmtcpy.  */
 190           string = fmtcpy;
 191           *string++ = '%';
 192           while (fmt < format_end)
 193             {
 194               *string++ = *fmt;
 195               if ('0' <= *fmt && *fmt <= '9')
 196                 {
 197                   /* Get an idea of how much space we might need.
 198                      This might be a field width or a precision; e.g.
 199                      %1.1000f and %1000.1f both might need 1000+ bytes.
 200                      Parse the width or precision, checking for overflow.  */
 201                   ptrdiff_t n = *fmt - '0';
 202                   while (fmt + 1 < format_end
 203                          && '0' <= fmt[1] && fmt[1] <= '9')
 204                     {
 205                       /* Avoid ptrdiff_t, size_t, and int overflow, as
 206                          many sprintfs mishandle widths greater than INT_MAX.
 207                          This test is simple but slightly conservative: e.g.,
 208                          (INT_MAX - INT_MAX % 10) is reported as an overflow
 209                          even when it's not.  */
 210                       if (n >= min (INT_MAX, min (PTRDIFF_MAX, SIZE_MAX)) / 10)
 211                         error ("Format width or precision too large");
 212                       n = n * 10 + fmt[1] - '0';
 213                       *string++ = *++fmt;
 214                     }
 215
 216                   if (size_bound < n)
 217                     size_bound = n;
 218                 }
 219               else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.'
 220                           || *fmt == '+'))
 221                 break;
 222               fmt++;
 223             }
 224
 225           /* Check for the length modifiers in textual length order, so
 226              that longer modifiers override shorter ones.  */
 227           for (mlen = 1; mlen <= maxmlen; mlen++)
 228             {
 229               if (format_end - fmt < mlen)
 230                 break;
 231               if (mlen == 1 && *fmt == 'l')
 232                 length_modifier = long_modifier;
 233               if (mlen == pDlen && memcmp (fmt, pD, pDlen) == 0)
 234                 length_modifier = pD_modifier;
 235               if (mlen == pIlen && memcmp (fmt, pI, pIlen) == 0)
 236                 length_modifier = pI_modifier;
 237               if (mlen == pMlen && memcmp (fmt, pMd, pMlen) == 0)
 238                 length_modifier = pM_modifier;
 239             }
 240
 241           mlen = modifier_len[length_modifier];
 242           memcpy (string, fmt + 1, mlen);
 243           string += mlen;
 244           fmt += mlen;
 245           *string = 0;
 246
 247           /* Make the size bound large enough to handle floating point formats
 248              with large numbers.  */
 249           if (size_bound > min (PTRDIFF_MAX, SIZE_MAX) - DBL_MAX_10_EXP - 50)
 250             error ("Format width or precision too large");
 251           size_bound += DBL_MAX_10_EXP + 50;
 252
 253           /* Make sure we have that much.  */
 254           if (size_bound > size_allocated)
 255             {
 256               if (big_buffer)
 257                 xfree (big_buffer);
 258               big_buffer = xmalloc (size_bound);
 259               sprintf_buffer = big_buffer;
 260               size_allocated = size_bound;
 261             }
 262           minlen = 0;
 263           switch (*fmt++)
 264             {
 265             default:
 266               error ("Invalid format operation %s", fmtcpy);
 267
 268 /*          case 'b': */
 269             case 'l':
 270             case 'd':
 271               switch (length_modifier)
 272                 {
 273                 case no_modifier:
 274                   {
 275                     int v = va_arg (ap, int);
 276                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 277                   }
 278                   break;
 279                 case long_modifier:
 280                   {
 281                     long v = va_arg (ap, long);
 282                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 283                   }
 284                   break;
 285                 case pD_modifier:
 286                 signed_pD_modifier:
 287                   {
 288                     ptrdiff_t v = va_arg (ap, ptrdiff_t);
 289                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 290                   }
 291                   break;
 292                 case pI_modifier:
 293                   {
 294                     EMACS_INT v = va_arg (ap, EMACS_INT);
 295                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 296                   }
 297                   break;
 298                 case pM_modifier:
 299                   {
 300                     intmax_t v = va_arg (ap, intmax_t);
 301                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 302                   }
 303                   break;
 304                 }
 305               /* Now copy into final output, truncating as necessary.  */
 306               string = sprintf_buffer;
 307               goto doit;
 308
 309             case 'o':
 310             case 'x':
 311               switch (length_modifier)
 312                 {
 313                 case no_modifier:
 314                   {
 315                     unsigned v = va_arg (ap, unsigned);
 316                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 317                   }
 318                   break;
 319                 case long_modifier:
 320                   {
 321                     unsigned long v = va_arg (ap, unsigned long);
 322                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 323                   }
 324                   break;
 325                 case pD_modifier:
 326                   goto signed_pD_modifier;
 327                 case pI_modifier:
 328                   {
 329                     EMACS_UINT v = va_arg (ap, EMACS_UINT);
 330                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 331                   }
 332                   break;
 333                 case pM_modifier:
 334                   {
 335                     uintmax_t v = va_arg (ap, uintmax_t);
 336                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 337                   }
 338                   break;
 339                 }
 340               /* Now copy into final output, truncating as necessary.  */
 341               string = sprintf_buffer;
 342               goto doit;
 343
 344             case 'f':
 345             case 'e':
 346             case 'g':
 347               {
 348                 double d = va_arg (ap, double);
 349                 tem = sprintf (sprintf_buffer, fmtcpy, d);
 350                 /* Now copy into final output, truncating as necessary.  */
 351                 string = sprintf_buffer;
 352                 goto doit;
 353               }
 354
 355             case 'S':
 356               string[-1] = 's';
 357             case 's':
 358               if (fmtcpy[1] != 's')
 359                 minlen = atoi (&fmtcpy[1]);
 360               string = va_arg (ap, char *);
 361               tem = strlen (string);
 362               if (STRING_BYTES_BOUND < tem)
 363                 error ("String for %%s or %%S format is too long");
 364               width = strwidth (string, tem);
 365               goto doit1;
 366
 367               /* Copy string into final output, truncating if no room.  */
 368             doit:
 369               eassert (0 <= tem);
 370               /* Coming here means STRING contains ASCII only.  */
 371               if (STRING_BYTES_BOUND < tem)
 372                 error ("Format width or precision too large");
 373               width = tem;
 374             doit1:
 375               /* We have already calculated:
 376                  TEM -- length of STRING,
 377                  WIDTH -- columns occupied by STRING when displayed, and
 378                  MINLEN -- minimum columns of the output.  */
 379               if (minlen > 0)
 380                 {
 381                   while (minlen > width && bufsize > 0)
 382                     {
 383                       *bufptr++ = ' ';
 384                       bufsize--;
 385                       minlen--;
 386                     }
 387                   minlen = 0;
 388                 }
 389               if (tem > bufsize)
 390                 {
 391                   /* Truncate the string at character boundary.  */
 392                   tem = bufsize;
 393                   do
 394                     {
 395                       tem--;
 396                       if (CHAR_HEAD_P (string[tem]))
 397                         {
 398                           if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
 399                             tem = bufsize;
 400                           break;
 401                         }
 402                     }
 403                   while (tem != 0);
 404
 405                   memcpy (bufptr, string, tem);
 406                   bufptr[tem] = 0;
 407                   /* Trigger exit from the loop, but make sure we
 408                      return to the caller a value which will indicate
 409                      that the buffer was too small.  */
 410                   bufptr += bufsize;
 411                   bufsize = 0;
 412                   continue;
 413                 }
 414               memcpy (bufptr, string, tem);
 415               bufptr += tem;
 416               bufsize -= tem;
 417               if (minlen < 0)
 418                 {
 419                   while (minlen < - width && bufsize > 0)
 420                     {
 421                       *bufptr++ = ' ';
 422                       bufsize--;
 423                       minlen++;
 424                     }
 425                   minlen = 0;
 426                 }
 427               continue;
 428
 429             case 'c':
 430               {
 431                 int chr = va_arg (ap, int);
 432                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 433                 string = charbuf;
 434                 string[tem] = 0;
 435                 width = strwidth (string, tem);
 436                 if (fmtcpy[1] != 'c')
 437                   minlen = atoi (&fmtcpy[1]);
 438                 goto doit1;
 439               }
 440
 441             case '%':
 442               fmt--;    /* Drop thru and this % will be treated as normal */
 443             }
 444         }
 445
 446       {
 447         /* Just some character; Copy it if the whole multi-byte form
 448            fit in the buffer.  */
 449         char *save_bufptr = bufptr;
 450
 451         do { *bufptr++ = *fmt++; }
 452         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 453         if (!CHAR_HEAD_P (*fmt))
 454           {
 455             /* Truncate, but return value that will signal to caller
 456                that the buffer was too small.  */
 457             *save_bufptr = 0;
 458             break;
 459           }
 460       }
 461     };
 462
 463   /* If we had to malloc something, free it.  */
 464   xfree (big_buffer);
 465
 466   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 467
 468   SAFE_FREE ();
 469   return bufptr - buffer;
 470 }
 471
 472 /* Format to an unbounded buffer BUF.  This is like sprintf, except it
 473    is not limited to returning an 'int' so it doesn't have a silly 2
 474    GiB limit on typical 64-bit hosts.  However, it is limited to the
 475    Emacs-style formats that doprnt supports.
 476
 477    Return the number of bytes put into BUF, excluding the terminating
 478    '\0'.  */
 479 ptrdiff_t
 480 esprintf (char *buf, char const *format, ...)
 481 {
 482   ptrdiff_t nbytes;
 483   va_list ap;
 484   va_start (ap, format);
 485   nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
 486   va_end (ap);
 487   return nbytes;
 488 }
 489
 490 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
 491
 492 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
 493    and updating *BUFSIZE if the buffer is too small, and otherwise
 494    behaving line esprintf.  When reallocating, free *BUF unless it is
 495    equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
 496    memory exhaustion instead of growing the buffer size past
 497    BUFSIZE_MAX.  */
 498 ptrdiff_t
 499 exprintf (char **buf, ptrdiff_t *bufsize,
 500           char const *nonheapbuf, ptrdiff_t bufsize_max,
 501           char const *format, ...)
 502 {
 503   ptrdiff_t nbytes;
 504   va_list ap;
 505   va_start (ap, format);
 506   nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
 507   va_end (ap);
 508   return nbytes;
 509 }
 510
 511 #endif
 512
 513 /* Act like exprintf, except take a va_list.  */
 514 ptrdiff_t
 515 evxprintf (char **buf, ptrdiff_t *bufsize,
 516            char const *nonheapbuf, ptrdiff_t bufsize_max,
 517            char const *format, va_list ap)
 518 {
 519   for (;;)
 520     {
 521       ptrdiff_t nbytes;
 522       va_list ap_copy;
 523       va_copy (ap_copy, ap);
 524       nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
 525       va_end (ap_copy);
 526       if (nbytes < *bufsize - 1)
 527         return nbytes;
 528       if (*buf != nonheapbuf)
 529         xfree (*buf);
 530       *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
 531     }
 532 }