src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2011  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the diplay width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    is empty or l or ll.  Also, %% in a format stands for a single % in the
  74    output.  A % that does not introduce a valid %-sequence causes
  75    undefined behavior.
  76
  77    The + flag character inserts a + before any positive number, while a space
  78    inserts a space before any positive number; these flags only affect %d, %o,
  79    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  80    as described below.  For signed numerical arguments only, the ` ' (space)
  81    flag causes the result to be prefixed with a space character if it does not
  82    start with a sign (+ or -).
  83
  84    The l (lower-case letter ell) length modifier is a `long' data type
  85    modifier: it is supported for %d, %o, and %x conversions of integral
  86    arguments, must immediately precede the conversion specifier, and means that
  87    the respective argument is to be treated as `long int' or `unsigned long
  88    int'.  Similarly, ll (two letter ells) means to use `long long int' or
  89    `unsigned long long int'; this can be used only on hosts that have
  90    these two types.  The empty length modifier means to use `int' or
  91    `unsigned int'.  EMACS_INT arguments should use the pI macro, which
  92    expands to whatever length modifier is needed for the target host.
  93
  94    The width specifier supplies a lower limit for the length of the printed
  95    representation.  The padding, if any, normally goes on the left, but it goes
  96    on the right if the - flag is present.  The padding character is normally a
  97    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  98    The - flag takes precedence over the 0 flag.
  99
 100    For %e, %f, and %g sequences, the number after the "." in the precision
 101    specifier says how many decimal places to show; if zero, the decimal point
 102    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 103
 104 #include <config.h>
 105 #include <stdio.h>
 106 #include <ctype.h>
 107 #include <setjmp.h>
 108
 109 #ifdef STDC_HEADERS
 110 #include <float.h>
 111 #endif
 112
 113 #include <unistd.h>
 114
 115 #include <limits.h>
 116
 117 #include "lisp.h"
 118
 119 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 120    don't have to include others because CHAR_HEAD_P does not contains
 121    another macro.  */
 122 #include "character.h"
 123
 124 #ifndef SIZE_MAX
 125 # define SIZE_MAX ((size_t) -1)
 126 #endif
 127
 128 #ifndef DBL_MAX_10_EXP
 129 #define DBL_MAX_10_EXP 308 /* IEEE double */
 130 #endif
 131
 132 /* Generate output from a format-spec FORMAT,
 133    terminated at position FORMAT_END.
 134    (*FORMAT_END is not part of the format, but must exist and be readable.)
 135    Output goes in BUFFER, which has room for BUFSIZE chars.
 136    BUFSIZE must be positive.  If the output does not fit, truncate it
 137    to fit and return BUFSIZE - 1; if this truncates a multibyte
 138    sequence, store '\0' into the sequence's first byte.
 139    Returns the number of bytes stored into BUFFER, excluding
 140    the terminating null byte.  Output is always null-terminated.
 141    String arguments are passed as C strings.
 142    Integers are passed as C integers.  */
 143
 144 size_t
 145 doprnt (char *buffer, register size_t bufsize, const char *format,
 146         const char *format_end, va_list ap)
 147 {
 148   const char *fmt = format;     /* Pointer into format string */
 149   register char *bufptr = buffer; /* Pointer into output buffer.. */
 150
 151   /* Use this for sprintf unless we need something really big.  */
 152   char tembuf[DBL_MAX_10_EXP + 100];
 153
 154   /* Size of sprintf_buffer.  */
 155   size_t size_allocated = sizeof (tembuf);
 156
 157   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 158   char *sprintf_buffer = tembuf;
 159
 160   /* Buffer we have got with malloc.  */
 161   char *big_buffer = NULL;
 162
 163   register size_t tem;
 164   char *string;
 165   char fixed_buffer[20];        /* Default buffer for small formatting. */
 166   char *fmtcpy;
 167   int minlen;
 168   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 169   USE_SAFE_ALLOCA;
 170
 171   if (format_end == 0)
 172     format_end = format + strlen (format);
 173
 174   if ((format_end - format + 1) < sizeof (fixed_buffer))
 175     fmtcpy = fixed_buffer;
 176   else
 177     SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
 178
 179   bufsize--;
 180
 181   /* Loop until end of format string or buffer full. */
 182   while (fmt < format_end && bufsize > 0)
 183     {
 184       if (*fmt == '%')  /* Check for a '%' character */
 185         {
 186           size_t size_bound = 0;
 187           EMACS_INT width;  /* Columns occupied by STRING on display.  */
 188           int long_flag = 0;
 189
 190           fmt++;
 191           /* Copy this one %-spec into fmtcpy.  */
 192           string = fmtcpy;
 193           *string++ = '%';
 194           while (fmt < format_end)
 195             {
 196               *string++ = *fmt;
 197               if ('0' <= *fmt && *fmt <= '9')
 198                 {
 199                   /* Get an idea of how much space we might need.
 200                      This might be a field width or a precision; e.g.
 201                      %1.1000f and %1000.1f both might need 1000+ bytes.
 202                      Parse the width or precision, checking for overflow.  */
 203                   size_t n = *fmt - '0';
 204                   while (fmt < format_end
 205                          && '0' <= fmt[1] && fmt[1] <= '9')
 206                     {
 207                       /* Avoid size_t overflow.  Avoid int overflow too, as
 208                          many sprintfs mishandle widths greater than INT_MAX.
 209                          This test is simple but slightly conservative: e.g.,
 210                          (INT_MAX - INT_MAX % 10) is reported as an overflow
 211                          even when it's not.  */
 212                       if (n >= min (INT_MAX, SIZE_MAX) / 10)
 213                         error ("Format width or precision too large");
 214                       n = n * 10 + fmt[1] - '0';
 215                       *string++ = *++fmt;
 216                     }
 217
 218                   if (size_bound < n)
 219                     size_bound = n;
 220                 }
 221               else if (*fmt == '-' || *fmt == ' ' || *fmt == '.' || *fmt == '+')
 222                 ;
 223               else if (*fmt == 'l')
 224                 {
 225                   long_flag = 1 + (fmt + 1 < format_end && fmt[1] == 'l');
 226                   fmt += long_flag;
 227                   break;
 228                 }
 229               else
 230                 break;
 231               fmt++;
 232             }
 233           if (fmt > format_end)
 234             fmt = format_end;
 235           *string = 0;
 236
 237           /* Make the size bound large enough to handle floating point formats
 238              with large numbers.  */
 239           if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
 240             error ("Format width or precision too large");
 241           size_bound += DBL_MAX_10_EXP + 50;
 242
 243           /* Make sure we have that much.  */
 244           if (size_bound > size_allocated)
 245             {
 246               if (big_buffer)
 247                 xfree (big_buffer);
 248               big_buffer = (char *) xmalloc (size_bound);
 249               sprintf_buffer = big_buffer;
 250               size_allocated = size_bound;
 251             }
 252           minlen = 0;
 253           switch (*fmt++)
 254             {
 255             default:
 256               error ("Invalid format operation %%%s%c",
 257                      "ll" + 2 - long_flag, fmt[-1]);
 258
 259 /*          case 'b': */
 260             case 'l':
 261             case 'd':
 262               {
 263                 int i;
 264                 long l;
 265
 266                 if (1 < long_flag)
 267                   {
 268 #ifdef HAVE_LONG_LONG_INT
 269                     long long ll = va_arg (ap, long long);
 270                     sprintf (sprintf_buffer, fmtcpy, ll);
 271 #else
 272                     error ("Invalid format operation %%ll%c", fmt[-1]);
 273 #endif
 274                   }
 275                 else if (long_flag)
 276                   {
 277                     l = va_arg(ap, long);
 278                     sprintf (sprintf_buffer, fmtcpy, l);
 279                   }
 280                 else
 281                   {
 282                     i = va_arg(ap, int);
 283                     sprintf (sprintf_buffer, fmtcpy, i);
 284                   }
 285                 /* Now copy into final output, truncating as necessary.  */
 286                 string = sprintf_buffer;
 287                 goto doit;
 288               }
 289
 290             case 'o':
 291             case 'x':
 292               {
 293                 unsigned u;
 294                 unsigned long ul;
 295
 296                 if (1 < long_flag)
 297                   {
 298 #ifdef HAVE_UNSIGNED_LONG_LONG_INT
 299                     unsigned long long ull = va_arg (ap, unsigned long long);
 300                     sprintf (sprintf_buffer, fmtcpy, ull);
 301 #else
 302                     error ("Invalid format operation %%ll%c", fmt[-1]);
 303 #endif
 304                   }
 305                 else if (long_flag)
 306                   {
 307                     ul = va_arg(ap, unsigned long);
 308                     sprintf (sprintf_buffer, fmtcpy, ul);
 309                   }
 310                 else
 311                   {
 312                     u = va_arg(ap, unsigned);
 313                     sprintf (sprintf_buffer, fmtcpy, u);
 314                   }
 315                 /* Now copy into final output, truncating as necessary.  */
 316                 string = sprintf_buffer;
 317                 goto doit;
 318               }
 319
 320             case 'f':
 321             case 'e':
 322             case 'g':
 323               {
 324                 double d = va_arg(ap, double);
 325                 sprintf (sprintf_buffer, fmtcpy, d);
 326                 /* Now copy into final output, truncating as necessary.  */
 327                 string = sprintf_buffer;
 328                 goto doit;
 329               }
 330
 331             case 'S':
 332               string[-1] = 's';
 333             case 's':
 334               if (fmtcpy[1] != 's')
 335                 minlen = atoi (&fmtcpy[1]);
 336               string = va_arg (ap, char *);
 337               tem = strlen (string);
 338               if (tem > MOST_POSITIVE_FIXNUM)
 339                 error ("String for %%s or %%S format is too long");
 340               width = strwidth (string, tem);
 341               goto doit1;
 342
 343               /* Copy string into final output, truncating if no room.  */
 344             doit:
 345               /* Coming here means STRING contains ASCII only.  */
 346               tem = strlen (string);
 347               if (tem > MOST_POSITIVE_FIXNUM)
 348                 error ("Format width or precision too large");
 349               width = tem;
 350             doit1:
 351               /* We have already calculated:
 352                  TEM -- length of STRING,
 353                  WIDTH -- columns occupied by STRING when displayed, and
 354                  MINLEN -- minimum columns of the output.  */
 355               if (minlen > 0)
 356                 {
 357                   while (minlen > width && bufsize > 0)
 358                     {
 359                       *bufptr++ = ' ';
 360                       bufsize--;
 361                       minlen--;
 362                     }
 363                   minlen = 0;
 364                 }
 365               if (tem > bufsize)
 366                 {
 367                   /* Truncate the string at character boundary.  */
 368                   tem = bufsize;
 369                   while (!CHAR_HEAD_P (string[tem - 1])) tem--;
 370                   /* If the multibyte sequence of this character is
 371                      too long for the space we have left in the
 372                      buffer, truncate before it.  */
 373                   if (tem > 0
 374                       && BYTES_BY_CHAR_HEAD (string[tem - 1]) > bufsize)
 375                     tem--;
 376                   if (tem > 0)
 377                     memcpy (bufptr, string, tem);
 378                   bufptr[tem] = 0;
 379                   /* Trigger exit from the loop, but make sure we
 380                      return to the caller a value which will indicate
 381                      that the buffer was too small.  */
 382                   bufptr += bufsize;
 383                   bufsize = 0;
 384                   continue;
 385                 }
 386               else
 387                 memcpy (bufptr, string, tem);
 388               bufptr += tem;
 389               bufsize -= tem;
 390               if (minlen < 0)
 391                 {
 392                   while (minlen < - width && bufsize > 0)
 393                     {
 394                       *bufptr++ = ' ';
 395                       bufsize--;
 396                       minlen++;
 397                     }
 398                   minlen = 0;
 399                 }
 400               continue;
 401
 402             case 'c':
 403               {
 404                 int chr = va_arg(ap, int);
 405                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 406                 string = charbuf;
 407                 string[tem] = 0;
 408                 width = strwidth (string, tem);
 409                 if (fmtcpy[1] != 'c')
 410                   minlen = atoi (&fmtcpy[1]);
 411                 goto doit1;
 412               }
 413
 414             case '%':
 415               fmt--;    /* Drop thru and this % will be treated as normal */
 416             }
 417         }
 418
 419       {
 420         /* Just some character; Copy it if the whole multi-byte form
 421            fit in the buffer.  */
 422         char *save_bufptr = bufptr;
 423
 424         do { *bufptr++ = *fmt++; }
 425         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 426         if (!CHAR_HEAD_P (*fmt))
 427           {
 428             /* Truncate, but return value that will signal to caller
 429                that the buffer was too small.  */
 430             *save_bufptr = 0;
 431             break;
 432           }
 433       }
 434     };
 435
 436   /* If we had to malloc something, free it.  */
 437   xfree (big_buffer);
 438
 439   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 440
 441   SAFE_FREE ();
 442   return bufptr - buffer;
 443 }