src/data/data-out.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2006, 2009, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "data/data-out.h"
  20
  21 #include <ctype.h>
  22 #include <float.h>
  23 #include <math.h>
  24 #include <stdint.h>
  25 #include <stdlib.h>
  26 #include <time.h>
  27 #include <unistr.h>
  28
  29 #include "data/calendar.h"
  30 #include "data/format.h"
  31 #include "data/settings.h"
  32 #include "data/value.h"
  33 #include "libpspp/assertion.h"
  34 #include "libpspp/cast.h"
  35 #include "libpspp/float-format.h"
  36 #include "libpspp/i18n.h"
  37 #include "libpspp/integer-format.h"
  38 #include "libpspp/message.h"
  39 #include "libpspp/misc.h"
  40 #include "libpspp/pool.h"
  41 #include "libpspp/str.h"
  42
  43 #include "gl/minmax.h"
  44 #include "gl/c-snprintf.h"
  45
  46 #include "gettext.h"
  47 #define _(msgid) gettext (msgid)
  48 \f
  49 /* A representation of a number that can be quickly rounded to
  50    any desired number of decimal places (up to a specified
  51    maximum). */
  52 struct rounder
  53   {
  54     char string[64];    /* Magnitude of number with excess precision. */
  55     int integer_digits; /* Number of digits before decimal point. */
  56     int leading_nines;  /* Number of `9's or `.'s at start of string. */
  57     int leading_zeros;  /* Number of `0's or `.'s at start of string. */
  58     bool negative;      /* Is the number negative? */
  59   };
  60
  61 static void rounder_init (struct rounder *, double number, int max_decimals);
  62 static int rounder_width (const struct rounder *, int decimals,
  63                           int *integer_digits, bool *negative);
  64 static void rounder_format (const struct rounder *, int decimals,
  65                             char *output);
  66 \f
  67 typedef void data_out_converter_func (const union value *,
  68                                       const struct fmt_spec *,
  69                                       char *);
  70 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
  71         static data_out_converter_func output_##METHOD;
  72 #include "format.def"
  73
  74 static bool output_decimal (const struct rounder *, const struct fmt_spec *,
  75                             bool require_affixes, char *);
  76 static bool output_scientific (double, const struct fmt_spec *,
  77                                bool require_affixes, char *);
  78
  79 static double power10 (int) PURE_FUNCTION;
  80 static double power256 (int) PURE_FUNCTION;
  81
  82 static void output_infinite (double, const struct fmt_spec *, char *);
  83 static void output_missing (const struct fmt_spec *, char *);
  84 static void output_overflow (const struct fmt_spec *, char *);
  85 static bool output_bcd_integer (double, int digits, char *);
  86 static void output_binary_integer (uint64_t, int bytes, enum integer_format,
  87                                    char *);
  88 static void output_hex (const void *, size_t bytes, char *);
  89 \f
  90
  91 static data_out_converter_func *const converters[FMT_NUMBER_OF_FORMATS] =
  92     {
  93 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) output_##METHOD,
  94 #include "format.def"
  95     };
  96
  97 /* Converts the INPUT value, encoded in INPUT_ENCODING, according to format
  98    specification FORMAT, appending the output to OUTPUT in OUTPUT_ENCODING.
  99    However, binary formats (FMT_P, FMT_PK, FMT_IB, FMT_PIB, FMT_RB) yield the
 100    binary results, which may not be properly encoded for OUTPUT_ENCODING.
 101
 102    VALUE must be the correct width for FORMAT, that is, its width must be
 103    fmt_var_width(FORMAT).
 104
 105    INPUT_ENCODING can normally be obtained by calling dict_get_encoding() on
 106    the dictionary with which INPUT is associated.  ENCODING is only important
 107    when FORMAT's type is FMT_A. */
 108 void
 109 data_out_recode (const union value *input, const char *input_encoding,
 110                  const struct fmt_spec *format,
 111                  struct string *output, const char *output_encoding)
 112 {
 113   assert (fmt_check_output (format));
 114   if (format->type == FMT_A)
 115     {
 116       char *in = CHAR_CAST (char *, value_str (input, format->w));
 117       char *out = recode_string (output_encoding, input_encoding,
 118                                  in, format->w);
 119       ds_put_cstr (output, out);
 120       free (out);
 121     }
 122   else if (fmt_get_category (format->type) == FMT_CAT_BINARY)
 123     converters[format->type] (input, format,
 124                               ds_put_uninit (output, format->w));
 125   else
 126     {
 127       char *utf8_encoded = data_out (input, input_encoding, format);
 128       char *output_encoded = recode_string (output_encoding, UTF8,
 129                                             utf8_encoded, -1);
 130       ds_put_cstr (output, output_encoded);
 131       free (output_encoded);
 132       free (utf8_encoded);
 133     }
 134 }
 135
 136 static char *
 137 binary_to_utf8 (const char *in, struct pool *pool)
 138 {
 139   uint8_t *out = pool_alloc_unaligned (pool, strlen (in) * 2 + 1);
 140   uint8_t *p = out;
 141
 142   while (*in != '\0')
 143     {
 144       uint8_t byte = *in++;
 145       int mblen = u8_uctomb (p, byte, 2);
 146       assert (mblen > 0);
 147       p += mblen;
 148     }
 149   *p = '\0';
 150
 151   return CHAR_CAST (char *, out);
 152 }
 153
 154 /* Converts the INPUT value into a UTF-8 encoded string, according to format
 155    specification FORMAT.
 156
 157    VALUE must be the correct width for FORMAT, that is, its width must be
 158    fmt_var_width(FORMAT).
 159
 160    ENCODING must be the encoding of INPUT.  Normally this can be obtained by
 161    calling dict_get_encoding() on the dictionary with which INPUT is
 162    associated.  ENCODING is only important when FORMAT's type is FMT_A.
 163
 164    The return value is dynamically allocated, and must be freed by the caller.
 165    If POOL is non-null, then the return value is allocated on that pool.  */
 166 char *
 167 data_out_pool (const union value *input, const char *encoding,
 168                const struct fmt_spec *format, struct pool *pool)
 169 {
 170   assert (fmt_check_output (format));
 171   if (format->type == FMT_A)
 172     {
 173       char *in = CHAR_CAST (char *, value_str (input, format->w));
 174       return recode_string_pool (UTF8, encoding, in, format->w, pool);
 175     }
 176   else if (fmt_get_category (format->type) == FMT_CAT_BINARY)
 177     {
 178       char tmp[16];
 179
 180       assert (format->w + 1 <= sizeof tmp);
 181       converters[format->type] (input, format, tmp);
 182       return binary_to_utf8 (tmp, pool);
 183     }
 184   else
 185     {
 186       const struct fmt_number_style *style = settings_get_style (format->type);
 187       size_t size = format->w + style->extra_bytes + 1;
 188       char *output;
 189
 190       output = pool_alloc_unaligned (pool, size);
 191       converters[format->type] (input, format, output);
 192       return output;
 193     }
 194 }
 195
 196 /* Like data_out_pool(), except that for basic numeric formats (F, COMMA, DOT,
 197    COLLAR, PCT, E) and custom currency formats are formatted as wide as
 198    necessary to fully display the selected number of decimal places. */
 199 char *
 200 data_out_stretchy (const union value *input, const char *encoding,
 201                    const struct fmt_spec *format, struct pool *pool)
 202 {
 203
 204   if (fmt_get_category (format->type) & (FMT_CAT_BASIC | FMT_CAT_CUSTOM))
 205     {
 206       const struct fmt_number_style *style = settings_get_style (format->type);
 207       struct fmt_spec wide_format;
 208       char tmp[128];
 209       size_t size;
 210
 211       wide_format.type = format->type;
 212       wide_format.w = 40;
 213       wide_format.d = format->d;
 214
 215       size = format->w + style->extra_bytes + 1;
 216       if (size <= sizeof tmp)
 217         {
 218           output_number (input, &wide_format, tmp);
 219           return pool_strdup (pool, tmp + strspn (tmp, " "));
 220         }
 221     }
 222
 223   return data_out_pool (input, encoding, format, pool);
 224 }
 225
 226 char *
 227 data_out (const union value *input, const char *encoding, const struct fmt_spec *format)
 228 {
 229   return data_out_pool (input, encoding, format, NULL);
 230 }
 231
 232 \f
 233 /* Main conversion functions. */
 234
 235 /* Outputs F, COMMA, DOT, DOLLAR, PCT, E, CCA, CCB, CCC, CCD, and
 236    CCE formats. */
 237 static void
 238 output_number (const union value *input, const struct fmt_spec *format,
 239                char *output)
 240 {
 241   double number = input->f;
 242
 243   if (number == SYSMIS)
 244     output_missing (format, output);
 245   else if (!isfinite (number))
 246     output_infinite (number, format, output);
 247   else
 248     {
 249       if (format->type != FMT_E && fabs (number) < 1.5 * power10 (format->w))
 250         {
 251           struct rounder r;
 252           rounder_init (&r, number, format->d);
 253
 254           if (output_decimal (&r, format, true, output)
 255               || output_scientific (number, format, true, output)
 256               || output_decimal (&r, format, false, output))
 257             return;
 258         }
 259
 260       if (!output_scientific (number, format, false, output))
 261         output_overflow (format, output);
 262     }
 263 }
 264
 265 /* Outputs N format. */
 266 static void
 267 output_N (const union value *input, const struct fmt_spec *format,
 268           char *output)
 269 {
 270   double number = input->f * power10 (format->d);
 271   if (input->f == SYSMIS || number < 0)
 272     output_missing (format, output);
 273   else
 274     {
 275       char buf[128];
 276       number = fabs (round (number));
 277       if (number < power10 (format->w)
 278           && c_snprintf (buf, 128, "%0*.0f", format->w, number) == format->w)
 279         memcpy (output, buf, format->w);
 280       else
 281         output_overflow (format, output);
 282     }
 283
 284   output[format->w] = '\0';
 285 }
 286
 287 /* Outputs Z format. */
 288 static void
 289 output_Z (const union value *input, const struct fmt_spec *format,
 290           char *output)
 291 {
 292   double number = input->f * power10 (format->d);
 293   char buf[128];
 294   if (input->f == SYSMIS)
 295     output_missing (format, output);
 296   else if (fabs (number) < power10 (format->w)
 297            && c_snprintf (buf, 128, "%0*.0f", format->w,
 298                        fabs (round (number))) == format->w)
 299     {
 300       if (number < 0 && strspn (buf, "0") < format->w)
 301         {
 302           char *p = &buf[format->w - 1];
 303           *p = "}JKLMNOPQR"[*p - '0'];
 304         }
 305       memcpy (output, buf, format->w);
 306       output[format->w] = '\0';
 307     }
 308   else
 309     output_overflow (format, output);
 310 }
 311
 312 /* Outputs P format. */
 313 static void
 314 output_P (const union value *input, const struct fmt_spec *format,
 315           char *output)
 316 {
 317   if (output_bcd_integer (fabs (input->f * power10 (format->d)),
 318                           format->w * 2 - 1, output)
 319       && input->f < 0.0)
 320     output[format->w - 1] |= 0xd;
 321   else
 322     output[format->w - 1] |= 0xf;
 323 }
 324
 325 /* Outputs PK format. */
 326 static void
 327 output_PK (const union value *input, const struct fmt_spec *format,
 328            char *output)
 329 {
 330   output_bcd_integer (input->f * power10 (format->d), format->w * 2, output);
 331 }
 332
 333 /* Outputs IB format. */
 334 static void
 335 output_IB (const union value *input, const struct fmt_spec *format,
 336            char *output)
 337 {
 338   double number = round (input->f * power10 (format->d));
 339   if (input->f == SYSMIS
 340       || number >= power256 (format->w) / 2 - 1
 341       || number < -power256 (format->w) / 2)
 342     memset (output, 0, format->w);
 343   else
 344     {
 345       uint64_t integer = fabs (number);
 346       if (number < 0)
 347         integer = -integer;
 348       output_binary_integer (integer, format->w,
 349                              settings_get_output_integer_format (),
 350                              output);
 351     }
 352
 353   output[format->w] = '\0';
 354 }
 355
 356 /* Outputs PIB format. */
 357 static void
 358 output_PIB (const union value *input, const struct fmt_spec *format,
 359             char *output)
 360 {
 361   double number = round (input->f * power10 (format->d));
 362   if (input->f == SYSMIS
 363       || number < 0 || number >= power256 (format->w))
 364     memset (output, 0, format->w);
 365   else
 366     output_binary_integer (number, format->w,
 367                            settings_get_output_integer_format (), output);
 368
 369   output[format->w] = '\0';
 370 }
 371
 372 /* Outputs PIBHEX format. */
 373 static void
 374 output_PIBHEX (const union value *input, const struct fmt_spec *format,
 375                char *output)
 376 {
 377   double number = round (input->f);
 378   if (input->f == SYSMIS)
 379     output_missing (format, output);
 380   else if (input->f < 0 || number >= power256 (format->w / 2))
 381     output_overflow (format, output);
 382   else
 383     {
 384       char tmp[8];
 385       output_binary_integer (number, format->w / 2, INTEGER_MSB_FIRST, tmp);
 386       output_hex (tmp, format->w / 2, output);
 387     }
 388
 389 }
 390
 391 /* Outputs RB format. */
 392 static void
 393 output_RB (const union value *input, const struct fmt_spec *format,
 394            char *output)
 395 {
 396   double d = input->f;
 397   memcpy (output, &d, format->w);
 398
 399   output[format->w] = '\0';
 400 }
 401
 402 /* Outputs RBHEX format. */
 403 static void
 404 output_RBHEX (const union value *input, const struct fmt_spec *format,
 405               char *output)
 406 {
 407   double d = input->f;
 408
 409   output_hex (&d, format->w / 2, output);
 410 }
 411
 412 /* Outputs DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, WKYR,
 413    DATETIME, TIME, and DTIME formats. */
 414 static void
 415 output_date (const union value *input, const struct fmt_spec *format,
 416              char *output)
 417 {
 418   double number = input->f;
 419   int year, month, day, yday;
 420
 421   const char *template = fmt_date_template (format->type, format->w);
 422
 423   char tmp[64];
 424   char *p = tmp;
 425
 426   if (number == SYSMIS)
 427     goto missing;
 428
 429   if (fmt_get_category (format->type) == FMT_CAT_DATE)
 430     {
 431       if (number <= 0)
 432         goto missing;
 433       calendar_offset_to_gregorian (number / 60. / 60. / 24.,
 434                                     &year, &month, &day, &yday);
 435       number = fmod (number, 60. * 60. * 24.);
 436     }
 437   else
 438     year = month = day = yday = 0;
 439
 440   while (*template != '\0')
 441     {
 442       int excess_width;
 443
 444       int ch = *template;
 445       int count = 1;
 446       while (template[count] == ch)
 447         count++;
 448       template += count;
 449
 450       switch (ch)
 451         {
 452         case 'd':
 453           if (count < 3)
 454             p += sprintf (p, "%02d", day);
 455           else
 456             p += sprintf (p, "%03d", yday);
 457           break;
 458         case 'm':
 459           if (count < 3)
 460             p += sprintf (p, "%02d", month);
 461           else
 462             {
 463               static const char *const months[12] =
 464                 {
 465                   "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
 466                   "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
 467                 };
 468               p = stpcpy (p, months[month - 1]);
 469             }
 470           break;
 471         case 'y':
 472           if (count >= 4)
 473             {
 474               if (year <= 9999)
 475                 p += sprintf (p, "%04d", year);
 476               else if (format->type == FMT_DATETIME)
 477                 p = stpcpy (p, "****");
 478               else
 479                 goto overflow;
 480             }
 481           else
 482             {
 483               int epoch =  settings_get_epoch ();
 484               int offset = year - epoch;
 485               if (offset < 0 || offset > 99)
 486                 goto overflow;
 487               p += sprintf (p, "%02d", abs (year) % 100);
 488             }
 489           break;
 490         case 'q':
 491           p += sprintf (p, "%d", (month - 1) / 3 + 1);
 492           break;
 493         case 'w':
 494           p += sprintf (p, "%2d", (yday - 1) / 7 + 1);
 495           break;
 496         case 'D':
 497           if (number < 0)
 498             *p++ = '-';
 499           number = fabs (number);
 500           p += c_snprintf (p, 64, "%*.0f", count, floor (number / 60. / 60. / 24.));
 501           number = fmod (number, 60. * 60. * 24.);
 502           break;
 503         case 'H':
 504           if (number < 0)
 505             *p++ = '-';
 506           number = fabs (number);
 507           p += c_snprintf (p, 64, "%0*.0f", count, floor (number / 60. / 60.));
 508           number = fmod (number, 60. * 60.);
 509           break;
 510         case 'M':
 511           p += sprintf (p, "%02d", (int) floor (number / 60.));
 512           number = fmod (number, 60.);
 513           excess_width = format->w - (p - tmp);
 514           if (excess_width < 0)
 515             goto overflow;
 516           if (excess_width == 3 || excess_width == 4
 517               || (excess_width >= 5 && format->d == 0))
 518             p += sprintf (p, ":%02d", (int) number);
 519           else if (excess_width >= 5)
 520             {
 521               int d = MIN (format->d, excess_width - 4);
 522               int w = d + 3;
 523               c_snprintf (p, 64, ":%0*.*f", w, d, number);
 524               if (settings_get_decimal_char (FMT_F) != '.')
 525                 {
 526                   char *cp = strchr (p, '.');
 527                   if (cp != NULL)
 528                     *cp = settings_get_decimal_char (FMT_F);
 529                 }
 530               p += strlen (p);
 531             }
 532           goto done;
 533         default:
 534           assert (count == 1);
 535           *p++ = ch;
 536           break;
 537         }
 538     }
 539
 540  done:
 541   buf_copy_lpad (output, format->w, tmp, p - tmp, ' ');
 542   output[format->w] = '\0';
 543   return;
 544
 545  overflow:
 546   output_overflow (format, output);
 547   return;
 548
 549  missing:
 550   output_missing (format, output);
 551   return;
 552 }
 553
 554 /* Outputs WKDAY format. */
 555 static void
 556 output_WKDAY (const union value *input, const struct fmt_spec *format,
 557               char *output)
 558 {
 559   static const char *const weekdays[7] =
 560     {
 561       "SUNDAY", "MONDAY", "TUESDAY", "WEDNESDAY",
 562       "THURSDAY", "FRIDAY", "SATURDAY",
 563     };
 564
 565   if (input->f >= 1 && input->f < 8)
 566     {
 567       buf_copy_str_rpad (output, format->w,
 568                          weekdays[(int) input->f - 1], ' ');
 569       output[format->w] = '\0';
 570     }
 571   else
 572     {
 573       if (input->f != SYSMIS)
 574         msg (ME, _("Weekday number %f is not between 1 and 7."), input->f);
 575       output_missing (format, output);
 576     }
 577
 578 }
 579
 580 /* Outputs MONTH format. */
 581 static void
 582 output_MONTH (const union value *input, const struct fmt_spec *format,
 583               char *output)
 584 {
 585   static const char *const months[12] =
 586     {
 587       "JANUARY", "FEBRUARY", "MARCH", "APRIL", "MAY", "JUNE",
 588       "JULY", "AUGUST", "SEPTEMBER", "OCTOBER", "NOVEMBER", "DECEMBER",
 589     };
 590
 591   if (input->f >= 1 && input->f < 13)
 592     {
 593       buf_copy_str_rpad (output, format->w, months[(int) input->f - 1], ' ');
 594       output[format->w] = '\0';
 595     }
 596   else
 597     {
 598       if (input->f != SYSMIS)
 599         msg (ME, _("Month number %f is not between 1 and 12."), input->f);
 600       output_missing (format, output);
 601     }
 602
 603 }
 604
 605 /* Outputs A format. */
 606 static void
 607 output_A (const union value *input UNUSED,
 608           const struct fmt_spec *format UNUSED, char *output UNUSED)
 609 {
 610   NOT_REACHED ();
 611 }
 612
 613 /* Outputs AHEX format. */
 614 static void
 615 output_AHEX (const union value *input, const struct fmt_spec *format,
 616              char *output)
 617 {
 618   output_hex (value_str (input, format->w), format->w / 2, output);
 619 }
 620 \f
 621 /* Decimal and scientific formatting. */
 622
 623 /* If REQUEST plus the current *WIDTH fits within MAX_WIDTH,
 624    increments *WIDTH by REQUEST and return true.
 625    Otherwise returns false without changing *WIDTH. */
 626 static bool
 627 allocate_space (int request, int max_width, int *width)
 628 {
 629   assert (*width <= max_width);
 630   if (request + *width <= max_width)
 631     {
 632       *width += request;
 633       return true;
 634     }
 635   else
 636     return false;
 637 }
 638
 639 /* Tries to compose the number represented by R, in the style of
 640    FORMAT, into OUTPUT.  Returns true if successful, false on
 641    failure, which occurs if FORMAT's width is too narrow.  If
 642    REQUIRE_AFFIXES is true, then the prefix and suffix specified
 643    by FORMAT's style must be included; otherwise, they may be
 644    omitted to make the number fit. */
 645 static bool
 646 output_decimal (const struct rounder *r, const struct fmt_spec *format,
 647                 bool require_affixes, char *output)
 648 {
 649   const struct fmt_number_style *style =
 650     settings_get_style (format->type);
 651
 652   int decimals;
 653
 654   for (decimals = format->d; decimals >= 0; decimals--)
 655     {
 656       /* Formatted version of magnitude of NUMBER. */
 657       char magnitude[64];
 658
 659       /* Number of digits in MAGNITUDE's integer and fractional parts. */
 660       int integer_digits;
 661
 662       /* Amount of space within the field width already claimed.
 663          Initially this is the width of MAGNITUDE, then it is reduced
 664          in stages as space is allocated to prefixes and suffixes and
 665          grouping characters. */
 666       int width;
 667
 668       /* Include various decorations? */
 669       bool add_neg_prefix;
 670       bool add_affixes;
 671       bool add_grouping;
 672
 673       /* Position in output. */
 674       char *p;
 675
 676       /* Make sure there's room for the number's magnitude, plus
 677          the negative suffix, plus (if negative) the negative
 678          prefix. */
 679       width = rounder_width (r, decimals, &integer_digits, &add_neg_prefix);
 680       width += style->neg_suffix.width;
 681       if (add_neg_prefix)
 682         width += style->neg_prefix.width;
 683       if (width > format->w)
 684         continue;
 685
 686       /* If there's room for the prefix and suffix, allocate
 687          space.  If the affixes are required, but there's no
 688          space, give up. */
 689       add_affixes = allocate_space (fmt_affix_width (style),
 690                                     format->w, &width);
 691       if (!add_affixes && require_affixes)
 692         continue;
 693
 694       /* Check whether we should include grouping characters.
 695          We need room for a complete set or we don't insert any at all.
 696          We don't include grouping characters if decimal places were
 697          requested but they were all dropped. */
 698       add_grouping = (style->grouping != 0
 699                       && integer_digits > 3
 700                       && (format->d == 0 || decimals > 0)
 701                       && allocate_space ((integer_digits - 1) / 3,
 702                                          format->w, &width));
 703
 704       /* Format the number's magnitude. */
 705       rounder_format (r, decimals, magnitude);
 706
 707       /* Assemble number. */
 708       p = output;
 709       if (format->w > width)
 710         p = mempset (p, ' ', format->w - width);
 711       if (add_neg_prefix)
 712         p = stpcpy (p, style->neg_prefix.s);
 713       if (add_affixes)
 714         p = stpcpy (p, style->prefix.s);
 715       if (!add_grouping)
 716         p = mempcpy (p, magnitude, integer_digits);
 717       else
 718         {
 719           int i;
 720           for (i = 0; i < integer_digits; i++)
 721             {
 722               if (i > 0 && (integer_digits - i) % 3 == 0)
 723                 *p++ = style->grouping;
 724               *p++ = magnitude[i];
 725             }
 726         }
 727       if (decimals > 0)
 728         {
 729           *p++ = style->decimal;
 730           p = mempcpy (p, &magnitude[integer_digits + 1], decimals);
 731         }
 732       if (add_affixes)
 733         p = stpcpy (p, style->suffix.s);
 734       if (add_neg_prefix)
 735         p = stpcpy (p, style->neg_suffix.s);
 736       else
 737         p = mempset (p, ' ', style->neg_suffix.width);
 738
 739       assert (p >= output + format->w);
 740       assert (p <= output + format->w + style->extra_bytes);
 741       *p = '\0';
 742
 743       return true;
 744     }
 745   return false;
 746 }
 747
 748 /* Formats NUMBER into OUTPUT in scientific notation according to
 749    the style of the format specified in FORMAT. */
 750 static bool
 751 output_scientific (double number, const struct fmt_spec *format,
 752                    bool require_affixes, char *output)
 753 {
 754   const struct fmt_number_style *style =
 755     settings_get_style (format->type);
 756   int width;
 757   int fraction_width;
 758   bool add_affixes;
 759   char *p;
 760
 761   /* Allocate minimum required space. */
 762   width = 6 + style->neg_suffix.width;
 763   if (number < 0)
 764     width += style->neg_prefix.width;
 765   if (width > format->w)
 766     return false;
 767
 768   /* Check for room for prefix and suffix. */
 769   add_affixes = allocate_space (fmt_affix_width (style), format->w, &width);
 770   if (require_affixes && !add_affixes)
 771     return false;
 772
 773   /* Figure out number of characters we can use for the fraction,
 774      if any.  (If that turns out to be 1, then we'll output a
 775      decimal point without any digits following; that's what the
 776      # flag does in the call to c_snprintf, below.) */
 777   fraction_width = MIN (MIN (format->d + 1, format->w - width), 16);
 778   if (format->type != FMT_E && fraction_width == 1)
 779     fraction_width = 0;
 780   width += fraction_width;
 781
 782   /* Format (except suffix). */
 783   p = output;
 784   if (width < format->w)
 785     p = mempset (p, ' ', format->w - width);
 786   if (number < 0)
 787     p = stpcpy (p, style->neg_prefix.s);
 788   if (add_affixes)
 789     p = stpcpy (p, style->prefix.s);
 790   if (fraction_width > 0)
 791     c_snprintf (p, 64, "%#.*E", fraction_width - 1, fabs (number));
 792   else
 793     c_snprintf (p, 64, "%.0E", fabs (number));
 794
 795   /* The C locale always uses a period `.' as a decimal point.
 796      Translate to comma if necessary. */
 797   if (style->decimal != '.')
 798     {
 799       char *cp = strchr (p, '.');
 800       if (cp != NULL)
 801         *cp = style->decimal;
 802     }
 803
 804   /* Make exponent have exactly three digits, plus sign. */
 805   {
 806     char *cp = strchr (p, 'E') + 1;
 807     long int exponent = strtol (cp, NULL, 10);
 808     if (abs (exponent) > 999)
 809       return false;
 810     sprintf (cp, "%+04ld", exponent);
 811   }
 812
 813   /* Add suffixes. */
 814   p = strchr (p, '\0');
 815   if (add_affixes)
 816     p = stpcpy (p, style->suffix.s);
 817   if (number < 0)
 818     p = stpcpy (p, style->neg_suffix.s);
 819   else
 820     p = mempset (p, ' ', style->neg_suffix.width);
 821
 822   assert (p >= output + format->w);
 823   assert (p <= output + format->w + style->extra_bytes);
 824   *p = '\0';
 825
 826   return true;
 827 }
 828 \f
 829 /* Returns true if the magnitude represented by R should be
 830    rounded up when chopped off at DECIMALS decimal places, false
 831    if it should be rounded down. */
 832 static bool
 833 should_round_up (const struct rounder *r, int decimals)
 834 {
 835   int digit = r->string[r->integer_digits + decimals + 1];
 836   assert (digit >= '0' && digit <= '9');
 837   return digit >= '5';
 838 }
 839
 840 /* Initializes R for formatting the magnitude of NUMBER to no
 841    more than MAX_DECIMAL decimal places. */
 842 static void
 843 rounder_init (struct rounder *r, double number, int max_decimals)
 844 {
 845   assert (fabs (number) < 1e41);
 846   assert (max_decimals >= 0 && max_decimals <= 16);
 847   if (max_decimals == 0)
 848     {
 849       /* Fast path.  No rounding needed.
 850
 851          We append ".00" to the integer representation because
 852          round_up assumes that fractional digits are present.  */
 853       c_snprintf (r->string, 64, "%.0f.00", fabs (round (number)));
 854     }
 855   else
 856     {
 857       /* Slow path.
 858
 859          This is more difficult than it really should be because
 860          we have to make sure that numbers that are exactly
 861          halfway between two representations are always rounded
 862          away from zero.  This is not what sprintf normally does
 863          (usually it rounds to even), so we have to fake it as
 864          best we can, by formatting with extra precision and then
 865          doing the rounding ourselves.
 866
 867          We take up to two rounds to format numbers.  In the
 868          first round, we obtain 2 digits of precision beyond
 869          those requested by the user.  If those digits are
 870          exactly "50", then in a second round we format with as
 871          many digits as are significant in a "double".
 872
 873          It might be better to directly implement our own
 874          floating-point formatting routine instead of relying on
 875          the system's sprintf implementation.  But the classic
 876          Steele and White paper on printing floating-point
 877          numbers does not hint how to do what we want, and it's
 878          not obvious how to change their algorithms to do so.  It
 879          would also be a lot of work. */
 880       c_snprintf (r->string, 64, "%.*f", max_decimals + 2, fabs (number));
 881       if (!strcmp (r->string + strlen (r->string) - 2, "50"))
 882         {
 883           int binary_exponent, decimal_exponent, format_decimals;
 884           frexp (number, &binary_exponent);
 885           decimal_exponent = binary_exponent * 3 / 10;
 886           format_decimals = (DBL_DIG + 1) - decimal_exponent;
 887           if (format_decimals > max_decimals + 2)
 888             c_snprintf (r->string, 64, "%.*f", format_decimals, fabs (number));
 889         }
 890     }
 891
 892   if (r->string[0] == '0')
 893     memmove (r->string, &r->string[1], strlen (r->string));
 894
 895   r->leading_zeros = strspn (r->string, "0.");
 896   r->leading_nines = strspn (r->string, "9.");
 897   r->integer_digits = strchr (r->string, '.') - r->string;
 898   assert (r->integer_digits < 64);
 899   assert (r->integer_digits >= 0);
 900   r->negative = number < 0;
 901 }
 902
 903 /* Returns the number of characters required to format the
 904    magnitude represented by R to DECIMALS decimal places.
 905    The return value includes integer digits and a decimal point
 906    and fractional digits, if any, but it does not include any
 907    negative prefix or suffix or other affixes.
 908
 909    *INTEGER_DIGITS is set to the number of digits before the
 910    decimal point in the output, between 0 and 40.
 911
 912    If R represents a negative number and its rounded
 913    representation would include at least one nonzero digit,
 914    *NEGATIVE is set to true; otherwise, it is set to false. */
 915 static int
 916 rounder_width (const struct rounder *r, int decimals,
 917                int *integer_digits, bool *negative)
 918 {
 919   /* Calculate base measures. */
 920   int width = r->integer_digits;
 921   if (decimals > 0)
 922     width += decimals + 1;
 923   *integer_digits = r->integer_digits;
 924   *negative = r->negative;
 925
 926   /* Rounding can cause adjustments. */
 927   if (should_round_up (r, decimals))
 928     {
 929       /* Rounding up leading 9s adds a new digit (a 1). */
 930       if (r->leading_nines >= width)
 931         {
 932           width++;
 933           ++*integer_digits;
 934         }
 935     }
 936   else
 937     {
 938       /* Rounding down. */
 939       if (r->leading_zeros >= width)
 940         {
 941           /* All digits that remain after rounding are zeros.
 942              Therefore we drop the negative sign. */
 943           *negative = false;
 944           if (r->integer_digits == 0 && decimals == 0)
 945             {
 946               /* No digits at all are left.  We need to display
 947                  at least a single digit (a zero). */
 948               assert (width == 0);
 949               width++;
 950               *integer_digits = 1;
 951             }
 952         }
 953     }
 954   return width;
 955 }
 956
 957 /* Formats the magnitude represented by R into OUTPUT, rounding
 958    to DECIMALS decimal places.  Exactly as many characters as
 959    indicated by rounder_width are written.  No terminating null
 960    is appended. */
 961 static void
 962 rounder_format (const struct rounder *r, int decimals, char *output)
 963 {
 964   int base_width = r->integer_digits + (decimals > 0 ? decimals + 1 : 0);
 965   if (should_round_up (r, decimals))
 966     {
 967       if (r->leading_nines < base_width)
 968         {
 969           /* Rounding up.  This is the common case where rounding
 970              up doesn't add an extra digit. */
 971           char *p;
 972           memcpy (output, r->string, base_width);
 973           for (p = output + base_width - 1; ; p--)
 974             {
 975               assert (p >= output);
 976               if (*p == '9')
 977                 *p = '0';
 978               else if (*p >= '0' && *p <= '8')
 979                 {
 980                   (*p)++;
 981                   break;
 982                 }
 983               else
 984                 assert (*p == '.');
 985             }
 986         }
 987       else
 988         {
 989           /* Rounding up leading 9s causes the result to be a 1
 990              followed by a number of 0s, plus a decimal point. */
 991           char *p = output;
 992           *p++ = '1';
 993           p = mempset (p, '0', r->integer_digits);
 994           if (decimals > 0)
 995             {
 996               *p++ = '.';
 997               p = mempset (p, '0', decimals);
 998             }
 999           assert (p == output + base_width + 1);
1000         }
1001     }
1002   else
1003     {
1004       /* Rounding down. */
1005       if (r->integer_digits != 0 || decimals != 0)
1006         {
1007           /* Common case: just copy the digits. */
1008           memcpy (output, r->string, base_width);
1009         }
1010       else
1011         {
1012           /* No digits remain.  The output is just a zero. */
1013           output[0] = '0';
1014         }
1015     }
1016 }
1017 \f
1018 /* Helper functions. */
1019
1020 /* Returns 10**X. */
1021 static double PURE_FUNCTION
1022 power10 (int x)
1023 {
1024   static const double p[] =
1025     {
1026       1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1027       1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1028       1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
1029       1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
1030       1e40,
1031     };
1032   return x >= 0 && x < sizeof p / sizeof *p ? p[x] : pow (10.0, x);
1033 }
1034
1035 /* Returns 256**X. */
1036 static double PURE_FUNCTION
1037 power256 (int x)
1038 {
1039   static const double p[] =
1040     {
1041       1.0,
1042       256.0,
1043       65536.0,
1044       16777216.0,
1045       4294967296.0,
1046       1099511627776.0,
1047       281474976710656.0,
1048       72057594037927936.0,
1049       18446744073709551616.0
1050     };
1051   return x >= 0 && x < sizeof p / sizeof *p ? p[x] : pow (256.0, x);
1052 }
1053
1054 /* Formats non-finite NUMBER into OUTPUT according to the width
1055    given in FORMAT. */
1056 static void
1057 output_infinite (double number, const struct fmt_spec *format, char *output)
1058 {
1059   assert (!isfinite (number));
1060
1061   if (format->w >= 3)
1062     {
1063       const char *s;
1064
1065       if (isnan (number))
1066         s = "NaN";
1067       else if (isinf (number))
1068         s = number > 0 ? "+Infinity" : "-Infinity";
1069       else
1070         s = "Unknown";
1071
1072       buf_copy_str_lpad (output, format->w, s, ' ');
1073     }
1074   else
1075     output_overflow (format, output);
1076
1077   output[format->w] = '\0';
1078 }
1079
1080 /* Formats OUTPUT as a missing value for the given FORMAT. */
1081 static void
1082 output_missing (const struct fmt_spec *format, char *output)
1083 {
1084   memset (output, ' ', format->w);
1085
1086   if (format->type != FMT_N)
1087     {
1088       int dot_ofs = (format->type == FMT_PCT ? 2
1089                      : format->type == FMT_E ? 5
1090                      : 1);
1091       output[MAX (0, format->w - format->d - dot_ofs)] = '.';
1092     }
1093   else
1094     output[format->w - 1] = '.';
1095
1096   output[format->w] = '\0';
1097 }
1098
1099 /* Formats OUTPUT for overflow given FORMAT. */
1100 static void
1101 output_overflow (const struct fmt_spec *format, char *output)
1102 {
1103   memset (output, '*', format->w);
1104   output[format->w] = '\0';
1105 }
1106
1107 /* Converts the integer part of NUMBER to a packed BCD number
1108    with the given number of DIGITS in OUTPUT.  If DIGITS is odd,
1109    the least significant nibble of the final byte in OUTPUT is
1110    set to 0.  Returns true if successful, false if NUMBER is not
1111    representable.  On failure, OUTPUT is cleared to all zero
1112    bytes. */
1113 static bool
1114 output_bcd_integer (double number, int digits, char *output)
1115 {
1116   char decimal[64];
1117
1118   assert (digits < sizeof decimal);
1119
1120   output[DIV_RND_UP (digits, 2)] = '\0';
1121   if (number != SYSMIS
1122       && number >= 0.
1123       && number < power10 (digits)
1124       && c_snprintf (decimal, 64, "%0*.0f", digits, round (number)) == digits)
1125     {
1126       const char *src = decimal;
1127       int i;
1128
1129       for (i = 0; i < digits / 2; i++)
1130         {
1131           int d0 = *src++ - '0';
1132           int d1 = *src++ - '0';
1133           *output++ = (d0 << 4) + d1;
1134         }
1135       if (digits % 2)
1136         *output = (*src - '0') << 4;
1137
1138       return true;
1139     }
1140   else
1141     {
1142       memset (output, 0, DIV_RND_UP (digits, 2));
1143       return false;
1144     }
1145 }
1146
1147 /* Writes VALUE to OUTPUT as a BYTES-byte binary integer of the
1148    given INTEGER_FORMAT. */
1149 static void
1150 output_binary_integer (uint64_t value, int bytes,
1151                        enum integer_format integer_format, char *output)
1152 {
1153   integer_put (value, integer_format, output, bytes);
1154 }
1155
1156 /* Converts the BYTES bytes in DATA to twice as many hexadecimal
1157    digits in OUTPUT. */
1158 static void
1159 output_hex (const void *data_, size_t bytes, char *output)
1160 {
1161   const uint8_t *data = data_;
1162   size_t i;
1163
1164   for (i = 0; i < bytes; i++)
1165     {
1166       static const char hex_digits[] = "0123456789ABCDEF";
1167       *output++ = hex_digits[data[i] >> 4];
1168       *output++ = hex_digits[data[i] & 15];
1169     }
1170   *output = '\0';
1171 }