src/code/float.lisp

   1 ;;;; This file contains the definitions of float-specific number
   2 ;;;; support (other than irrational stuff, which is in irrat.) There is
   3 ;;;; code in here that assumes there are only two float formats: IEEE
   4 ;;;; single and double. (LONG-FLOAT support has been added, but bugs
   5 ;;;; may still remain due to old code which assumes this dichotomy.)
   6
   7 ;;;; This software is part of the SBCL system. See the README file for
   8 ;;;; more information.
   9 ;;;;
  10 ;;;; This software is derived from the CMU CL system, which was
  11 ;;;; written at Carnegie Mellon University and released into the
  12 ;;;; public domain. The software is in the public domain and is
  13 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
  14 ;;;; files for more information.
  15
  16 (in-package "SB!KERNEL")
  17 \f
  18 ;;;; float predicates and environment query
  19
  20 #!-sb-fluid
  21 (declaim (maybe-inline float-denormalized-p float-infinity-p float-nan-p
  22                        float-trapping-nan-p))
  23
  24 (defun float-denormalized-p (x)
  25   #!+sb-doc
  26   "Return true if the float X is denormalized."
  27   (number-dispatch ((x float))
  28     ((single-float)
  29      (and (zerop (ldb sb!vm:single-float-exponent-byte (single-float-bits x)))
  30           (not (zerop x))))
  31     ((double-float)
  32      (and (zerop (ldb sb!vm:double-float-exponent-byte
  33                       (double-float-high-bits x)))
  34           (not (zerop x))))
  35     #!+(and long-float x86)
  36     ((long-float)
  37      (and (zerop (ldb sb!vm:long-float-exponent-byte (long-float-exp-bits x)))
  38           (not (zerop x))))))
  39
  40 (defmacro !define-float-dispatching-function
  41     (name doc single double #!+(and long-float x86) long)
  42   (declare (ignorable doc))
  43   `(defun ,name (x)
  44      #!+sb-doc ,doc
  45      (number-dispatch ((x float))
  46        ((single-float)
  47         (let ((bits (single-float-bits x)))
  48           (and (> (ldb sb!vm:single-float-exponent-byte bits)
  49                   sb!vm:single-float-normal-exponent-max)
  50                ,single)))
  51        ((double-float)
  52         (let ((hi (double-float-high-bits x))
  53               (lo (double-float-low-bits x)))
  54           (declare (ignorable lo))
  55           (and (> (ldb sb!vm:double-float-exponent-byte hi)
  56                   sb!vm:double-float-normal-exponent-max)
  57                ,double)))
  58        #!+(and long-float x86)
  59        ((long-float)
  60         (let ((exp (long-float-exp-bits x))
  61               (hi (long-float-high-bits x))
  62               (lo (long-float-low-bits x)))
  63           (declare (ignorable lo))
  64           (and (> (ldb sb!vm:long-float-exponent-byte exp)
  65                   sb!vm:long-float-normal-exponent-max)
  66                ,long))))))
  67
  68 (!define-float-dispatching-function float-infinity-p
  69   "Return true if the float X is an infinity (+ or -)."
  70   (zerop (ldb sb!vm:single-float-significand-byte bits))
  71   (and (zerop (ldb sb!vm:double-float-significand-byte hi))
  72        (zerop lo))
  73   #!+(and long-float x86)
  74   (and (zerop (ldb sb!vm:long-float-significand-byte hi))
  75        (zerop lo)))
  76
  77 (!define-float-dispatching-function float-nan-p
  78   "Return true if the float X is a NaN (Not a Number)."
  79   (not (zerop (ldb sb!vm:single-float-significand-byte bits)))
  80   (or (not (zerop (ldb sb!vm:double-float-significand-byte hi)))
  81       (not (zerop lo)))
  82   #!+(and long-float x86)
  83   (or (not (zerop (ldb sb!vm:long-float-significand-byte hi)))
  84       (not (zerop lo))))
  85
  86 (!define-float-dispatching-function float-trapping-nan-p
  87   "Return true if the float X is a trapping NaN (Not a Number)."
  88   ;; HPPA (and apparently MIPS) have trapping NaNs (SNaNs) with the
  89   ;; trapping-nan-bit SET.  PPC, SPARC, Alpha, and x86 (and presumably
  90   ;; x86-64, ARM, and ARM64) have trapping NaNs (SNaNs) with the
  91   ;; trapping-nan-bit CLEAR.  Note that the given implementation
  92   ;; considers infinities to be FLOAT-TRAPPING-NAN-P on most
  93   ;; architectures.
  94   #!-(or mips hppa)
  95   (zerop (logand (ldb sb!vm:single-float-significand-byte bits)
  96                  sb!vm:single-float-trapping-nan-bit))
  97   #!+(or mips hppa)
  98   (not (zerop (logand (ldb sb!vm:single-float-significand-byte bits)
  99                       sb!vm:single-float-trapping-nan-bit)))
 100   #!-(or mips hppa)
 101   (zerop (logand (ldb sb!vm:double-float-significand-byte hi)
 102                  sb!vm:double-float-trapping-nan-bit))
 103   #!+(or mips hppa)
 104   (not (zerop (logand (ldb sb!vm:double-float-significand-byte hi)
 105                       sb!vm:double-float-trapping-nan-bit)))
 106   #!+(and long-float x86)
 107   (zerop (logand (ldb sb!vm:long-float-significand-byte hi)
 108                  sb!vm:long-float-trapping-nan-bit)))
 109
 110 ;;; If denormalized, use a subfunction from INTEGER-DECODE-FLOAT to find the
 111 ;;; actual exponent (and hence how denormalized it is), otherwise we just
 112 ;;; return the number of digits or 0.
 113 #!-sb-fluid (declaim (maybe-inline float-precision))
 114 (defun float-precision (f)
 115   #!+sb-doc
 116   "Return a non-negative number of significant digits in its float argument.
 117   Will be less than FLOAT-DIGITS if denormalized or zero."
 118   (declare (explicit-check))
 119   (macrolet ((frob (digits bias decode)
 120                `(cond ((zerop f) 0)
 121                       ((float-denormalized-p f)
 122                        (multiple-value-bind (ignore exp) (,decode f)
 123                          (declare (ignore ignore))
 124                          (truly-the fixnum
 125                                     (+ ,digits (1- ,digits) ,bias exp))))
 126                       (t
 127                        ,digits))))
 128     (number-dispatch ((f float))
 129       ((single-float)
 130        (frob sb!vm:single-float-digits sb!vm:single-float-bias
 131          integer-decode-single-denorm))
 132       ((double-float)
 133        (frob sb!vm:double-float-digits sb!vm:double-float-bias
 134          integer-decode-double-denorm))
 135       #!+long-float
 136       ((long-float)
 137        (frob sb!vm:long-float-digits sb!vm:long-float-bias
 138          integer-decode-long-denorm)))))
 139
 140 (defun float-sign (float1 &optional (float2 (float 1 float1)))
 141   #!+sb-doc
 142   "Return a floating-point number that has the same sign as
 143    FLOAT1 and, if FLOAT2 is given, has the same absolute value
 144    as FLOAT2."
 145   (declare (float float1 float2) (explicit-check))
 146   (* (if (etypecase float1
 147            (single-float (minusp (single-float-bits float1)))
 148            (double-float (minusp (double-float-high-bits float1)))
 149            #!+long-float
 150            (long-float (minusp (long-float-exp-bits float1))))
 151          (float -1 float1)
 152          (float 1 float1))
 153      (abs float2)))
 154
 155 (defun float-format-digits (format)
 156   (ecase format
 157     ((short-float single-float) sb!vm:single-float-digits)
 158     ((double-float #!-long-float long-float) sb!vm:double-float-digits)
 159     #!+long-float
 160     (long-float sb!vm:long-float-digits)))
 161
 162 #!-sb-fluid (declaim (inline float-digits float-radix))
 163
 164 (defun float-digits (f)
 165   (declare (explicit-check))
 166   (number-dispatch ((f float))
 167     ((single-float) sb!vm:single-float-digits)
 168     ((double-float) sb!vm:double-float-digits)
 169     #!+long-float
 170     ((long-float) sb!vm:long-float-digits)))
 171
 172 (defun float-radix (x)
 173   #!+sb-doc
 174   "Return (as an integer) the radix b of its floating-point argument."
 175   (declare (ignore x) (type float x))
 176   2)
 177 \f
 178 ;;;; INTEGER-DECODE-FLOAT and DECODE-FLOAT
 179
 180 #!-sb-fluid
 181 (declaim (maybe-inline integer-decode-single-float
 182                        integer-decode-double-float))
 183
 184 ;;; Handle the denormalized case of INTEGER-DECODE-FLOAT for SINGLE-FLOAT.
 185 (defun integer-decode-single-denorm (x)
 186   (declare (type single-float x))
 187   (let* ((bits (single-float-bits (abs x)))
 188          (sig (ash (ldb sb!vm:single-float-significand-byte bits) 1))
 189          (extra-bias 0))
 190     (declare (type (unsigned-byte 24) sig)
 191              (type (integer 0 23) extra-bias))
 192     (loop
 193       (unless (zerop (logand sig sb!vm:single-float-hidden-bit))
 194         (return))
 195       (setq sig (ash sig 1))
 196       (incf extra-bias))
 197     (values sig
 198             (- (- sb!vm:single-float-bias)
 199                sb!vm:single-float-digits
 200                extra-bias)
 201             (if (minusp (float-sign x)) -1 1))))
 202
 203 ;;; Handle the single-float case of INTEGER-DECODE-FLOAT. If an infinity or
 204 ;;; NaN, error. If a denorm, call i-d-s-DENORM to handle it.
 205 (defun integer-decode-single-float (x)
 206   (declare (single-float x))
 207   (let* ((bits (single-float-bits (abs x)))
 208          (exp (ldb sb!vm:single-float-exponent-byte bits))
 209          (sig (ldb sb!vm:single-float-significand-byte bits))
 210          (sign (if (minusp (float-sign x)) -1 1))
 211          (biased (- exp sb!vm:single-float-bias sb!vm:single-float-digits)))
 212     (declare (fixnum biased))
 213     (unless (<= exp sb!vm:single-float-normal-exponent-max)
 214       (error "can't decode NaN or infinity: ~S" x))
 215     (cond ((and (zerop exp) (zerop sig))
 216            (values 0 biased sign))
 217           ((< exp sb!vm:single-float-normal-exponent-min)
 218            (integer-decode-single-denorm x))
 219           (t
 220            (values (logior sig sb!vm:single-float-hidden-bit) biased sign)))))
 221
 222 ;;; like INTEGER-DECODE-SINGLE-DENORM, only doubly so
 223 (defun integer-decode-double-denorm (x)
 224   (declare (type double-float x))
 225   (let* ((high-bits (double-float-high-bits (abs x)))
 226          (sig-high (ldb sb!vm:double-float-significand-byte high-bits))
 227          (low-bits (double-float-low-bits x))
 228          (sign (if (minusp (float-sign x)) -1 1))
 229          (biased (- (- sb!vm:double-float-bias) sb!vm:double-float-digits)))
 230     (if (zerop sig-high)
 231         (let ((sig low-bits)
 232               (extra-bias (- sb!vm:double-float-digits 33))
 233               (bit (ash 1 31)))
 234           (declare (type (unsigned-byte 32) sig) (fixnum extra-bias))
 235           (loop
 236             (unless (zerop (logand sig bit)) (return))
 237             (setq sig (ash sig 1))
 238             (incf extra-bias))
 239           (values (ash sig (- sb!vm:double-float-digits 32))
 240                   (truly-the fixnum (- biased extra-bias))
 241                   sign))
 242         (let ((sig (ash sig-high 1))
 243               (extra-bias 0))
 244           (declare (type (unsigned-byte 32) sig) (fixnum extra-bias))
 245           (loop
 246             (unless (zerop (logand sig sb!vm:double-float-hidden-bit))
 247               (return))
 248             (setq sig (ash sig 1))
 249             (incf extra-bias))
 250           (values (logior (ash sig 32) (ash low-bits (1- extra-bias)))
 251                   (truly-the fixnum (- biased extra-bias))
 252                   sign)))))
 253
 254 ;;; like INTEGER-DECODE-SINGLE-FLOAT, only doubly so
 255 (defun integer-decode-double-float (x)
 256   (declare (double-float x))
 257   (let* ((abs (abs x))
 258          (hi (double-float-high-bits abs))
 259          (lo (double-float-low-bits abs))
 260          (exp (ldb sb!vm:double-float-exponent-byte hi))
 261          (sig (ldb sb!vm:double-float-significand-byte hi))
 262          (sign (if (minusp (float-sign x)) -1 1))
 263          (biased (- exp sb!vm:double-float-bias sb!vm:double-float-digits)))
 264     (declare (fixnum biased))
 265     (unless (<= exp sb!vm:double-float-normal-exponent-max)
 266       (error "Can't decode NaN or infinity: ~S." x))
 267     (cond ((and (zerop exp) (zerop sig) (zerop lo))
 268            (values 0 biased sign))
 269           ((< exp sb!vm:double-float-normal-exponent-min)
 270            (integer-decode-double-denorm x))
 271           (t
 272            (values
 273             (logior (ash (logior (ldb sb!vm:double-float-significand-byte hi)
 274                                  sb!vm:double-float-hidden-bit)
 275                          32)
 276                     lo)
 277             biased sign)))))
 278
 279 #!+(and long-float x86)
 280 (defun integer-decode-long-denorm (x)
 281   (declare (type long-float x))
 282   (let* ((high-bits (long-float-high-bits (abs x)))
 283          (sig-high (ldb sb!vm:long-float-significand-byte high-bits))
 284          (low-bits (long-float-low-bits x))
 285          (sign (if (minusp (float-sign x)) -1 1))
 286          (biased (- (- sb!vm:long-float-bias) sb!vm:long-float-digits)))
 287     (if (zerop sig-high)
 288         (let ((sig low-bits)
 289               (extra-bias (- sb!vm:long-float-digits 33))
 290               (bit (ash 1 31)))
 291           (declare (type (unsigned-byte 32) sig) (fixnum extra-bias))
 292           (loop
 293             (unless (zerop (logand sig bit)) (return))
 294             (setq sig (ash sig 1))
 295             (incf extra-bias))
 296           (values (ash sig (- sb!vm:long-float-digits 32))
 297                   (truly-the fixnum (- biased extra-bias))
 298                   sign))
 299         (let ((sig (ash sig-high 1))
 300               (extra-bias 0))
 301           (declare (type (unsigned-byte 32) sig) (fixnum extra-bias))
 302           (loop
 303             (unless (zerop (logand sig sb!vm:long-float-hidden-bit))
 304               (return))
 305             (setq sig (ash sig 1))
 306             (incf extra-bias))
 307           (values (logior (ash sig 32) (ash low-bits (1- extra-bias)))
 308                   (truly-the fixnum (- biased extra-bias))
 309                   sign)))))
 310
 311 #!+(and long-float x86)
 312 (defun integer-decode-long-float (x)
 313   (declare (long-float x))
 314   (let* ((hi (long-float-high-bits x))
 315          (lo (long-float-low-bits x))
 316          (exp-bits (long-float-exp-bits x))
 317          (exp (ldb sb!vm:long-float-exponent-byte exp-bits))
 318          (sign (if (minusp exp-bits) -1 1))
 319          (biased (- exp sb!vm:long-float-bias sb!vm:long-float-digits)))
 320     (declare (fixnum biased))
 321     (unless (<= exp sb!vm:long-float-normal-exponent-max)
 322       (error "can't decode NaN or infinity: ~S" x))
 323     (cond ((and (zerop exp) (zerop hi) (zerop lo))
 324            (values 0 biased sign))
 325           ((< exp sb!vm:long-float-normal-exponent-min)
 326            (integer-decode-long-denorm x))
 327           (t
 328            (values (logior (ash hi 32) lo) biased sign)))))
 329
 330 ;;; Dispatch to the correct type-specific i-d-f function.
 331 (defun integer-decode-float (x)
 332   #!+sb-doc
 333   "Return three values:
 334    1) an integer representation of the significand.
 335    2) the exponent for the power of 2 that the significand must be multiplied
 336       by to get the actual value. This differs from the DECODE-FLOAT exponent
 337       by FLOAT-DIGITS, since the significand has been scaled to have all its
 338       digits before the radix point.
 339    3) -1 or 1 (i.e. the sign of the argument.)"
 340   (declare (explicit-check))
 341   (number-dispatch ((x float))
 342     ((single-float)
 343      (integer-decode-single-float x))
 344     ((double-float)
 345      (integer-decode-double-float x))
 346     #!+long-float
 347     ((long-float)
 348      (integer-decode-long-float x))))
 349
 350 #!-sb-fluid (declaim (maybe-inline decode-single-float decode-double-float))
 351
 352 ;;; Handle the denormalized case of DECODE-SINGLE-FLOAT. We call
 353 ;;; INTEGER-DECODE-SINGLE-DENORM and then make the result into a float.
 354 (defun decode-single-denorm (x)
 355   (declare (type single-float x))
 356   (multiple-value-bind (sig exp sign) (integer-decode-single-denorm x)
 357     (values (make-single-float
 358              (dpb sig sb!vm:single-float-significand-byte
 359                   (dpb sb!vm:single-float-bias
 360                        sb!vm:single-float-exponent-byte
 361                        0)))
 362             (truly-the fixnum (+ exp sb!vm:single-float-digits))
 363             (float sign x))))
 364
 365 ;;; Handle the single-float case of DECODE-FLOAT. If an infinity or NaN,
 366 ;;; error. If a denorm, call d-s-DENORM to handle it.
 367 (defun decode-single-float (x)
 368   (declare (single-float x))
 369   (let* ((bits (single-float-bits (abs x)))
 370          (exp (ldb sb!vm:single-float-exponent-byte bits))
 371          (sign (float-sign x))
 372          (biased (truly-the single-float-exponent
 373                             (- exp sb!vm:single-float-bias))))
 374     (unless (<= exp sb!vm:single-float-normal-exponent-max)
 375       (error "can't decode NaN or infinity: ~S" x))
 376     (cond ((zerop x)
 377            (values 0.0f0 biased sign))
 378           ((< exp sb!vm:single-float-normal-exponent-min)
 379            (decode-single-denorm x))
 380           (t
 381            (values (make-single-float
 382                     (dpb sb!vm:single-float-bias
 383                          sb!vm:single-float-exponent-byte
 384                          bits))
 385                    biased sign)))))
 386
 387 ;;; like DECODE-SINGLE-DENORM, only doubly so
 388 (defun decode-double-denorm (x)
 389   (declare (double-float x))
 390   (multiple-value-bind (sig exp sign) (integer-decode-double-denorm x)
 391     (values (make-double-float
 392              (dpb (logand (ash sig -32) (lognot sb!vm:double-float-hidden-bit))
 393                   sb!vm:double-float-significand-byte
 394                   (dpb sb!vm:double-float-bias
 395                        sb!vm:double-float-exponent-byte 0))
 396              (ldb (byte 32 0) sig))
 397             (truly-the fixnum (+ exp sb!vm:double-float-digits))
 398             (float sign x))))
 399
 400 ;;; like DECODE-SINGLE-FLOAT, only doubly so
 401 (defun decode-double-float (x)
 402   (declare (double-float x))
 403   (let* ((abs (abs x))
 404          (hi (double-float-high-bits abs))
 405          (lo (double-float-low-bits abs))
 406          (exp (ldb sb!vm:double-float-exponent-byte hi))
 407          (sign (float-sign x))
 408          (biased (truly-the double-float-exponent
 409                             (- exp sb!vm:double-float-bias))))
 410     (unless (<= exp sb!vm:double-float-normal-exponent-max)
 411       (error "can't decode NaN or infinity: ~S" x))
 412     (cond ((zerop x)
 413            (values 0.0d0 biased sign))
 414           ((< exp sb!vm:double-float-normal-exponent-min)
 415            (decode-double-denorm x))
 416           (t
 417            (values (make-double-float
 418                     (dpb sb!vm:double-float-bias
 419                          sb!vm:double-float-exponent-byte hi)
 420                     lo)
 421                    biased sign)))))
 422
 423 #!+(and long-float x86)
 424 (defun decode-long-denorm (x)
 425   (declare (long-float x))
 426   (multiple-value-bind (sig exp sign) (integer-decode-long-denorm x)
 427     (values (make-long-float sb!vm:long-float-bias (ash sig -32)
 428                              (ldb (byte 32 0) sig))
 429             (truly-the fixnum (+ exp sb!vm:long-float-digits))
 430             (float sign x))))
 431
 432 #!+(and long-float x86)
 433 (defun decode-long-float (x)
 434   (declare (long-float x))
 435   (let* ((hi (long-float-high-bits x))
 436          (lo (long-float-low-bits x))
 437          (exp-bits (long-float-exp-bits x))
 438          (exp (ldb sb!vm:long-float-exponent-byte exp-bits))
 439          (sign (if (minusp exp-bits) -1l0 1l0))
 440          (biased (truly-the long-float-exponent
 441                             (- exp sb!vm:long-float-bias))))
 442     (unless (<= exp sb!vm:long-float-normal-exponent-max)
 443       (error "can't decode NaN or infinity: ~S" x))
 444     (cond ((zerop x)
 445            (values 0.0l0 biased sign))
 446           ((< exp sb!vm:long-float-normal-exponent-min)
 447            (decode-long-denorm x))
 448           (t
 449            (values (make-long-float
 450                     (dpb sb!vm:long-float-bias sb!vm:long-float-exponent-byte
 451                          exp-bits)
 452                     hi
 453                     lo)
 454                    biased sign)))))
 455
 456 ;;; Dispatch to the appropriate type-specific function.
 457 (defun decode-float (f)
 458   #!+sb-doc
 459   "Return three values:
 460    1) a floating-point number representing the significand. This is always
 461       between 0.5 (inclusive) and 1.0 (exclusive).
 462    2) an integer representing the exponent.
 463    3) -1.0 or 1.0 (i.e. the sign of the argument.)"
 464   (declare (explicit-check))
 465   (number-dispatch ((f float))
 466     ((single-float)
 467      (decode-single-float f))
 468     ((double-float)
 469      (decode-double-float f))
 470     #!+long-float
 471     ((long-float)
 472      (decode-long-float f))))
 473 \f
 474 ;;;; SCALE-FLOAT
 475
 476 #!-sb-fluid (declaim (maybe-inline scale-single-float scale-double-float))
 477
 478 ;;; Handle float scaling where the X is denormalized or the result is
 479 ;;; denormalized or underflows to 0.
 480 (defun scale-float-maybe-underflow (x exp)
 481   (multiple-value-bind (sig old-exp) (integer-decode-float x)
 482     (let* ((digits (float-digits x))
 483            (new-exp (+ exp old-exp digits
 484                        (etypecase x
 485                          (single-float sb!vm:single-float-bias)
 486                          (double-float sb!vm:double-float-bias))))
 487            (sign (if (minusp (float-sign x)) 1 0)))
 488       (cond
 489        ((< new-exp
 490            (etypecase x
 491              (single-float sb!vm:single-float-normal-exponent-min)
 492              (double-float sb!vm:double-float-normal-exponent-min)))
 493         (when (sb!vm:current-float-trap :inexact)
 494           (error 'floating-point-inexact :operation 'scale-float
 495                  :operands (list x exp)))
 496         (when (sb!vm:current-float-trap :underflow)
 497           (error 'floating-point-underflow :operation 'scale-float
 498                  :operands (list x exp)))
 499         (let ((shift (1- new-exp)))
 500           (if (< shift (- (1- digits)))
 501               (float-sign x 0.0)
 502               (etypecase x
 503                 (single-float (single-from-bits sign 0 (ash sig shift)))
 504                 (double-float (double-from-bits sign 0 (ash sig shift)))))))
 505        (t
 506         (etypecase x
 507           (single-float (single-from-bits sign new-exp sig))
 508           (double-float (double-from-bits sign new-exp sig))))))))
 509
 510 ;;; Called when scaling a float overflows, or the original float was a
 511 ;;; NaN or infinity. If overflow errors are trapped, then error,
 512 ;;; otherwise return the appropriate infinity. If a NaN, signal or not
 513 ;;; as appropriate.
 514 (defun scale-float-maybe-overflow (x exp)
 515   (cond
 516    ((float-infinity-p x)
 517     ;; Infinity is infinity, no matter how small...
 518     x)
 519    ((float-nan-p x)
 520     (when (and (float-trapping-nan-p x)
 521                (sb!vm:current-float-trap :invalid))
 522       (error 'floating-point-invalid-operation :operation 'scale-float
 523              :operands (list x exp)))
 524     x)
 525    (t
 526     (when (sb!vm:current-float-trap :overflow)
 527       (error 'floating-point-overflow :operation 'scale-float
 528              :operands (list x exp)))
 529     (when (sb!vm:current-float-trap :inexact)
 530       (error 'floating-point-inexact :operation 'scale-float
 531              :operands (list x exp)))
 532     (* (float-sign x)
 533        (etypecase x
 534          (single-float
 535           ;; SINGLE-FLOAT-POSITIVE-INFINITY
 536           (single-from-bits 0 (1+ sb!vm:single-float-normal-exponent-max) 0))
 537          (double-float
 538           ;; DOUBLE-FLOAT-POSITIVE-INFINITY
 539           (double-from-bits 0 (1+ sb!vm:double-float-normal-exponent-max) 0)))))))
 540
 541 ;;; Scale a single or double float, calling the correct over/underflow
 542 ;;; functions.
 543 (defun scale-single-float (x exp)
 544   (declare (single-float x) (integer exp))
 545   (etypecase exp
 546     (fixnum
 547      (let* ((bits (single-float-bits x))
 548             (old-exp (ldb sb!vm:single-float-exponent-byte bits))
 549             (new-exp (+ old-exp exp)))
 550        (cond
 551          ((zerop x) x)
 552          ((or (< old-exp sb!vm:single-float-normal-exponent-min)
 553               (< new-exp sb!vm:single-float-normal-exponent-min))
 554           (scale-float-maybe-underflow x exp))
 555          ((or (> old-exp sb!vm:single-float-normal-exponent-max)
 556               (> new-exp sb!vm:single-float-normal-exponent-max))
 557           (scale-float-maybe-overflow x exp))
 558          (t
 559           (make-single-float (dpb new-exp
 560                                   sb!vm:single-float-exponent-byte
 561                                   bits))))))
 562     (unsigned-byte (scale-float-maybe-overflow x exp))
 563     ((integer * 0) (scale-float-maybe-underflow x exp))))
 564 (defun scale-double-float (x exp)
 565   (declare (double-float x) (integer exp))
 566   (etypecase exp
 567     (fixnum
 568      (let* ((hi (double-float-high-bits x))
 569             (lo (double-float-low-bits x))
 570             (old-exp (ldb sb!vm:double-float-exponent-byte hi))
 571             (new-exp (+ old-exp exp)))
 572        (cond
 573          ((zerop x) x)
 574          ((or (< old-exp sb!vm:double-float-normal-exponent-min)
 575               (< new-exp sb!vm:double-float-normal-exponent-min))
 576           (scale-float-maybe-underflow x exp))
 577          ((or (> old-exp sb!vm:double-float-normal-exponent-max)
 578               (> new-exp sb!vm:double-float-normal-exponent-max))
 579           (scale-float-maybe-overflow x exp))
 580          (t
 581           (make-double-float (dpb new-exp sb!vm:double-float-exponent-byte hi)
 582                              lo)))))
 583     (unsigned-byte (scale-float-maybe-overflow x exp))
 584     ((integer * 0) (scale-float-maybe-underflow x exp))))
 585
 586 #!+(and x86 long-float)
 587 (defun scale-long-float (x exp)
 588   (declare (long-float x) (integer exp))
 589   (scale-float x exp))
 590
 591 ;;; Dispatch to the correct type-specific scale-float function.
 592 (defun scale-float (f ex)
 593   #!+sb-doc
 594   "Return the value (* f (expt (float 2 f) ex)), but with no unnecessary loss
 595   of precision or overflow."
 596   (declare (explicit-check))
 597   (number-dispatch ((f float))
 598     ((single-float)
 599      (scale-single-float f ex))
 600     ((double-float)
 601      (scale-double-float f ex))
 602     #!+long-float
 603     ((long-float)
 604      (scale-long-float f ex))))
 605 \f
 606 ;;;; converting to/from floats
 607
 608 (defun float (number &optional (other () otherp))
 609   #!+sb-doc
 610   "Converts any REAL to a float. If OTHER is not provided, it returns a
 611   SINGLE-FLOAT if NUMBER is not already a FLOAT. If OTHER is provided, the
 612   result is the same float format as OTHER."
 613   (declare (explicit-check))
 614   (if otherp
 615       (number-dispatch ((number real) (other float))
 616         (((foreach rational single-float double-float #!+long-float long-float)
 617           (foreach single-float double-float #!+long-float long-float))
 618          (coerce number '(dispatch-type other))))
 619       (if (floatp number)
 620           number
 621           (coerce number 'single-float))))
 622
 623 (macrolet ((frob (name type)
 624              `(defun ,name (x)
 625                 (number-dispatch ((x real))
 626                   (((foreach single-float double-float #!+long-float long-float
 627                              fixnum))
 628                    (coerce x ',type))
 629                   ((bignum)
 630                    (bignum-to-float x ',type))
 631                   ((ratio)
 632                    (float-ratio x ',type))))))
 633   (frob %single-float single-float)
 634   (frob %double-float double-float)
 635   #!+long-float
 636   (frob %long-float long-float))
 637
 638 ;;; Convert a ratio to a float. We avoid any rounding error by doing an
 639 ;;; integer division. Accuracy is important to preserve print-read
 640 ;;; consistency, since this is ultimately how the reader reads a float. We
 641 ;;; scale the numerator by a power of two until the division results in the
 642 ;;; desired number of fraction bits, then do round-to-nearest.
 643 (defun float-ratio (x format)
 644   (let* ((signed-num (numerator x))
 645          (plusp (plusp signed-num))
 646          (num (if plusp signed-num (- signed-num)))
 647          (den (denominator x))
 648          (digits (float-format-digits format))
 649          (scale 0))
 650     (declare (fixnum digits scale))
 651     ;; Strip any trailing zeros from the denominator and move it into the scale
 652     ;; factor (to minimize the size of the operands.)
 653     (let ((den-twos (1- (integer-length (logxor den (1- den))))))
 654       (declare (fixnum den-twos))
 655       (decf scale den-twos)
 656       (setq den (ash den (- den-twos))))
 657     ;; Guess how much we need to scale by from the magnitudes of the numerator
 658     ;; and denominator. We want one extra bit for a guard bit.
 659     (let* ((num-len (integer-length num))
 660            (den-len (integer-length den))
 661            (delta (- den-len num-len))
 662            (shift (1+ (the fixnum (+ delta digits))))
 663            (shifted-num (ash num shift)))
 664       (declare (fixnum delta shift))
 665       (decf scale delta)
 666       (labels ((float-and-scale (bits)
 667                  (let* ((bits (ash bits -1))
 668                         (len (integer-length bits)))
 669                    (cond ((> len digits)
 670                           (aver (= len (the fixnum (1+ digits))))
 671                           (scale-float (floatit (ash bits -1)) (1+ scale)))
 672                          (t
 673                           (scale-float (floatit bits) scale)))))
 674                (floatit (bits)
 675                  (let ((sign (if plusp 0 1)))
 676                    (case format
 677                      (single-float
 678                       (single-from-bits sign sb!vm:single-float-bias bits))
 679                      (double-float
 680                       (double-from-bits sign sb!vm:double-float-bias bits))
 681                      #!+long-float
 682                      (long-float
 683                       (long-from-bits sign sb!vm:long-float-bias bits))))))
 684         (loop
 685           (multiple-value-bind (fraction-and-guard rem)
 686               (truncate shifted-num den)
 687             (let ((extra (- (integer-length fraction-and-guard) digits)))
 688               (declare (fixnum extra))
 689               (cond ((/= extra 1)
 690                      (aver (> extra 1)))
 691                     ((oddp fraction-and-guard)
 692                      (return
 693                       (if (zerop rem)
 694                           (float-and-scale
 695                            (if (zerop (logand fraction-and-guard 2))
 696                                fraction-and-guard
 697                                (1+ fraction-and-guard)))
 698                           (float-and-scale (1+ fraction-and-guard)))))
 699                     (t
 700                      (return (float-and-scale fraction-and-guard)))))
 701             (setq shifted-num (ash shifted-num -1))
 702             (incf scale)))))))
 703
 704 ;;; These might be useful if we ever have a machine without float/integer
 705 ;;; conversion hardware. For now, we'll use special ops that
 706 ;;; uninterruptibly frob the rounding modes & do ieee round-to-integer.
 707 #+nil
 708 (progn
 709   ;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE
 710   ;; and the result is known to be a fixnum. We can avoid some generic
 711   ;; arithmetic in this case.
 712   (defun %unary-truncate-single-float/fixnum (x)
 713     (declare (single-float x) (values fixnum))
 714     (locally (declare (optimize (speed 3) (safety 0)))
 715       (let* ((bits (single-float-bits x))
 716              (exp (ldb sb!vm:single-float-exponent-byte bits))
 717              (frac (logior (ldb sb!vm:single-float-significand-byte bits)
 718                            sb!vm:single-float-hidden-bit))
 719              (shift (- exp sb!vm:single-float-digits sb!vm:single-float-bias)))
 720         (when (> exp sb!vm:single-float-normal-exponent-max)
 721           (error 'floating-point-invalid-operation :operator 'truncate
 722                  :operands (list x)))
 723         (if (<= shift (- sb!vm:single-float-digits))
 724             0
 725             (let ((res (ash frac shift)))
 726               (declare (type (unsigned-byte 31) res))
 727               (if (minusp bits)
 728                   (- res)
 729                   res))))))
 730   ;; Double-float version of this operation (see above single op).
 731   (defun %unary-truncate-double-float/fixnum (x)
 732     (declare (double-float x) (values fixnum))
 733     (locally (declare (optimize (speed 3) (safety 0)))
 734       (let* ((hi-bits (double-float-high-bits x))
 735              (exp (ldb sb!vm:double-float-exponent-byte hi-bits))
 736              (frac (logior (ldb sb!vm:double-float-significand-byte hi-bits)
 737                            sb!vm:double-float-hidden-bit))
 738              (shift (- exp (- sb!vm:double-float-digits sb!vm:n-word-bits)
 739                        sb!vm:double-float-bias)))
 740         (when (> exp sb!vm:double-float-normal-exponent-max)
 741           (error 'floating-point-invalid-operation :operator 'truncate
 742                  :operands (list x)))
 743         (if (<= shift (- sb!vm:n-word-bits sb!vm:double-float-digits))
 744             0
 745             (let* ((res-hi (ash frac shift))
 746                    (res (if (plusp shift)
 747                             (logior res-hi
 748                                     (the fixnum
 749                                       (ash (double-float-low-bits x)
 750                                            (- shift sb!vm:n-word-bits))))
 751                             res-hi)))
 752               (declare (type (unsigned-byte 31) res-hi res))
 753               (if (minusp hi-bits)
 754                   (- res)
 755                   res)))))))
 756
 757 ;;; This function is called when we are doing a truncate without any funky
 758 ;;; divisor, i.e. converting a float or ratio to an integer. Note that we do
 759 ;;; *not* return the second value of truncate, so it must be computed by the
 760 ;;; caller if needed.
 761 ;;;
 762 ;;; In the float case, we pick off small arguments so that compiler
 763 ;;; can use special-case operations. We use an exclusive test, since
 764 ;;; (due to round-off error), (float most-positive-fixnum) is likely
 765 ;;; to be equal to (1+ most-positive-fixnum).  An exclusive test is
 766 ;;; good enough, because most-positive-fixnum will be one less than a
 767 ;;; power of two, and that power of two will be exactly representable
 768 ;;; as a float (at least until we get 128-bit fixnums).
 769 (defun %unary-truncate (number)
 770   (number-dispatch ((number real))
 771     ((integer) number)
 772     ((ratio) (values (truncate (numerator number) (denominator number))))
 773     (((foreach single-float double-float #!+long-float long-float))
 774      (if (< (float most-negative-fixnum number)
 775             number
 776             (float most-positive-fixnum number))
 777          (truly-the fixnum (%unary-truncate number))
 778          (multiple-value-bind (bits exp) (integer-decode-float number)
 779            (let ((res (ash bits exp)))
 780              (if (minusp number)
 781                  (- res)
 782                  res)))))))
 783
 784 ;;; Specialized versions for floats.
 785 (macrolet ((def (type name)
 786              `(defun ,name (number)
 787                 (if (< ,(coerce sb!xc:most-negative-fixnum type)
 788                        number
 789                        ,(coerce sb!xc:most-positive-fixnum type))
 790                     (truly-the fixnum (,name number))
 791                     ;; General -- slow -- case.
 792                     (multiple-value-bind (bits exp) (integer-decode-float number)
 793                       (let ((res (ash bits exp)))
 794                         (if (minusp number)
 795                             (- res)
 796                             res)))))))
 797   (def single-float %unary-truncate/single-float)
 798   (def double-float %unary-truncate/double-float)
 799   #!+long-float
 800   (def double-float %unary-truncate/long-float))
 801
 802 ;;; Similar to %UNARY-TRUNCATE, but rounds to the nearest integer. If we
 803 ;;; can't use the round primitive, then we do our own round-to-nearest on the
 804 ;;; result of i-d-f. [Note that this rounding will really only happen with
 805 ;;; double floats, since the whole single-float fraction will fit in a fixnum,
 806 ;;; so all single-floats larger than most-positive-fixnum can be precisely
 807 ;;; represented by an integer.]
 808 (defun %unary-round (number)
 809   (number-dispatch ((number real))
 810     ((integer) number)
 811     ((ratio) (values (round (numerator number) (denominator number))))
 812     (((foreach single-float double-float #!+long-float long-float))
 813      (if (< (float most-negative-fixnum number)
 814             number
 815             (float most-positive-fixnum number))
 816          (truly-the fixnum (%unary-round number))
 817          (multiple-value-bind (bits exp) (integer-decode-float number)
 818            (let* ((shifted (ash bits exp))
 819                   (rounded (if (minusp exp)
 820                                (let ((fractional-bits (logand bits (lognot (ash -1 (- exp)))))
 821                                      (0.5bits (ash 1 (- -1 exp))))
 822                                  (cond
 823                                    ((> fractional-bits 0.5bits) (1+ shifted))
 824                                    ((< fractional-bits 0.5bits) shifted)
 825                                    (t (if (oddp shifted) (1+ shifted) shifted))))
 826                                shifted)))
 827              (if (minusp number)
 828                  (- rounded)
 829                  rounded)))))))
 830
 831 (defun %unary-ftruncate (number)
 832   (number-dispatch ((number real))
 833     ((integer) (float number))
 834     ((ratio) (float (truncate (numerator number) (denominator number))))
 835     (((foreach single-float double-float #!+long-float long-float))
 836      (%unary-ftruncate number))))
 837
 838 (defun rational (x)
 839   #!+sb-doc
 840   "RATIONAL produces a rational number for any real numeric argument. This is
 841   more efficient than RATIONALIZE, but it assumes that floating-point is
 842   completely accurate, giving a result that isn't as pretty."
 843   (declare (explicit-check))
 844   (number-dispatch ((x real))
 845     (((foreach single-float double-float #!+long-float long-float))
 846      (multiple-value-bind (bits exp) (integer-decode-float x)
 847        (if (eql bits 0)
 848            0
 849            (let* ((int (if (minusp x) (- bits) bits))
 850                   (digits (float-digits x))
 851                   (ex (+ exp digits)))
 852              (if (minusp ex)
 853                  (integer-/-integer int (ash 1 (+ digits (- ex))))
 854                  (integer-/-integer (ash int ex) (ash 1 digits)))))))
 855     ((rational) x)))
 856
 857 ;;; This algorithm for RATIONALIZE, due to Bruno Haible, is included
 858 ;;; with permission.
 859 ;;;
 860 ;;; Algorithm (recursively presented):
 861 ;;;   If x is a rational number, return x.
 862 ;;;   If x = 0.0, return 0.
 863 ;;;   If x < 0.0, return (- (rationalize (- x))).
 864 ;;;   If x > 0.0:
 865 ;;;     Call (integer-decode-float x). It returns a m,e,s=1 (mantissa,
 866 ;;;     exponent, sign).
 867 ;;;     If m = 0 or e >= 0: return x = m*2^e.
 868 ;;;     Search a rational number between a = (m-1/2)*2^e and b = (m+1/2)*2^e
 869 ;;;     with smallest possible numerator and denominator.
 870 ;;;     Note 1: If m is a power of 2, we ought to take a = (m-1/4)*2^e.
 871 ;;;       But in this case the result will be x itself anyway, regardless of
 872 ;;;       the choice of a. Therefore we can simply ignore this case.
 873 ;;;     Note 2: At first, we need to consider the closed interval [a,b].
 874 ;;;       but since a and b have the denominator 2^(|e|+1) whereas x itself
 875 ;;;       has a denominator <= 2^|e|, we can restrict the seach to the open
 876 ;;;       interval (a,b).
 877 ;;;     So, for given a and b (0 < a < b) we are searching a rational number
 878 ;;;     y with a <= y <= b.
 879 ;;;     Recursive algorithm fraction_between(a,b):
 880 ;;;       c := (ceiling a)
 881 ;;;       if c < b
 882 ;;;         then return c       ; because a <= c < b, c integer
 883 ;;;         else
 884 ;;;           ; a is not integer (otherwise we would have had c = a < b)
 885 ;;;           k := c-1          ; k = floor(a), k < a < b <= k+1
 886 ;;;           return y = k + 1/fraction_between(1/(b-k), 1/(a-k))
 887 ;;;                             ; note 1 <= 1/(b-k) < 1/(a-k)
 888 ;;;
 889 ;;; You can see that we are actually computing a continued fraction expansion.
 890 ;;;
 891 ;;; Algorithm (iterative):
 892 ;;;   If x is rational, return x.
 893 ;;;   Call (integer-decode-float x). It returns a m,e,s (mantissa,
 894 ;;;     exponent, sign).
 895 ;;;   If m = 0 or e >= 0, return m*2^e*s. (This includes the case x = 0.0.)
 896 ;;;   Create rational numbers a := (2*m-1)*2^(e-1) and b := (2*m+1)*2^(e-1)
 897 ;;;   (positive and already in lowest terms because the denominator is a
 898 ;;;   power of two and the numerator is odd).
 899 ;;;   Start a continued fraction expansion
 900 ;;;     p[-1] := 0, p[0] := 1, q[-1] := 1, q[0] := 0, i := 0.
 901 ;;;   Loop
 902 ;;;     c := (ceiling a)
 903 ;;;     if c >= b
 904 ;;;       then k := c-1, partial_quotient(k), (a,b) := (1/(b-k),1/(a-k)),
 905 ;;;            goto Loop
 906 ;;;   finally partial_quotient(c).
 907 ;;;   Here partial_quotient(c) denotes the iteration
 908 ;;;     i := i+1, p[i] := c*p[i-1]+p[i-2], q[i] := c*q[i-1]+q[i-2].
 909 ;;;   At the end, return s * (p[i]/q[i]).
 910 ;;;   This rational number is already in lowest terms because
 911 ;;;   p[i]*q[i-1]-p[i-1]*q[i] = (-1)^i.
 912 ;;;
 913 ;;; See also
 914 ;;;   Hardy, Wright: An introduction to number theory
 915 ;;; and/or
 916 ;;;   <http://modular.fas.harvard.edu/edu/Fall2001/124/lectures/lecture17/lecture17/>
 917 ;;;   <http://modular.fas.harvard.edu/edu/Fall2001/124/lectures/lecture17/lecture18/>
 918
 919 (defun rationalize (x)
 920   #!+sb-doc
 921   "Converts any REAL to a RATIONAL.  Floats are converted to a simple rational
 922   representation exploiting the assumption that floats are only accurate to
 923   their precision.  RATIONALIZE (and also RATIONAL) preserve the invariant:
 924       (= x (float (rationalize x) x))"
 925   (declare (explicit-check))
 926   (number-dispatch ((x real))
 927     (((foreach single-float double-float #!+long-float long-float))
 928      ;; This is a fairly straigtforward implementation of the
 929      ;; iterative algorithm above.
 930      (multiple-value-bind (frac expo sign)
 931          (integer-decode-float x)
 932        (cond ((or (zerop frac) (>= expo 0))
 933               (if (minusp sign)
 934                   (- (ash frac expo))
 935                   (ash frac expo)))
 936              (t
 937               ;; expo < 0 and (2*m-1) and (2*m+1) are coprime to 2^(1-e),
 938               ;; so build the fraction up immediately, without having to do
 939               ;; a gcd.
 940               (let ((a (build-ratio (- (* 2 frac) 1) (ash 1 (- 1 expo))))
 941                     (b (build-ratio (+ (* 2 frac) 1) (ash 1 (- 1 expo))))
 942                     (p0 0)
 943                     (q0 1)
 944                     (p1 1)
 945                     (q1 0))
 946                 (do ((c (ceiling a) (ceiling a)))
 947                     ((< c b)
 948                      (let ((top (+ (* c p1) p0))
 949                            (bot (+ (* c q1) q0)))
 950                        (build-ratio (if (minusp sign)
 951                                         (- top)
 952                                         top)
 953                                     bot)))
 954                   (let* ((k (- c 1))
 955                          (p2 (+ (* k p1) p0))
 956                          (q2 (+ (* k q1) q0)))
 957                     (psetf a (/ (- b k))
 958                            b (/ (- a k)))
 959                     (setf p0 p1
 960                           q0 q1
 961                           p1 p2
 962                           q1 q2))))))))
 963     ((rational) x)))