1 ;;;; This file contains the definitions of float-specific number
2 ;;;; support (other than irrational stuff, which is in irrat.) There is
3 ;;;; code in here that assumes there are only two float formats: IEEE
4 ;;;; single and double. (LONG-FLOAT support has been added, but bugs
5 ;;;; may still remain due to old code which assumes this dichotomy.)
7 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; This software is derived from the CMU CL system, which was
11 ;;;; written at Carnegie Mellon University and released into the
12 ;;;; public domain. The software is in the public domain and is
13 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
14 ;;;; files for more information.
16 (in-package "SB!KERNEL")
18 ;;;; float predicates and environment query
21 (declaim (maybe-inline float-denormalized-p float-infinity-p float-nan-p
22 float-trapping-nan-p
))
24 (defun float-denormalized-p (x)
26 "Return true if the float X is denormalized."
27 (number-dispatch ((x float
))
29 (and (zerop (ldb sb
!vm
:single-float-exponent-byte
(single-float-bits x
)))
32 (and (zerop (ldb sb
!vm
:double-float-exponent-byte
33 (double-float-high-bits x
)))
35 #!+(and long-float x86
)
37 (and (zerop (ldb sb
!vm
:long-float-exponent-byte
(long-float-exp-bits x
)))
40 (defmacro !define-float-dispatching-function
41 (name doc single double
#!+(and long-float x86
) long
)
42 (declare (ignorable doc
))
45 (number-dispatch ((x float
))
47 (let ((bits (single-float-bits x
)))
48 (and (> (ldb sb
!vm
:single-float-exponent-byte bits
)
49 sb
!vm
:single-float-normal-exponent-max
)
52 (let ((hi (double-float-high-bits x
))
53 (lo (double-float-low-bits x
)))
54 (declare (ignorable lo
))
55 (and (> (ldb sb
!vm
:double-float-exponent-byte hi
)
56 sb
!vm
:double-float-normal-exponent-max
)
58 #!+(and long-float x86
)
60 (let ((exp (long-float-exp-bits x
))
61 (hi (long-float-high-bits x
))
62 (lo (long-float-low-bits x
)))
63 (declare (ignorable lo
))
64 (and (> (ldb sb
!vm
:long-float-exponent-byte exp
)
65 sb
!vm
:long-float-normal-exponent-max
)
68 (!define-float-dispatching-function float-infinity-p
69 "Return true if the float X is an infinity (+ or -)."
70 (zerop (ldb sb
!vm
:single-float-significand-byte bits
))
71 (and (zerop (ldb sb
!vm
:double-float-significand-byte hi
))
73 #!+(and long-float x86
)
74 (and (zerop (ldb sb
!vm
:long-float-significand-byte hi
))
77 (!define-float-dispatching-function float-nan-p
78 "Return true if the float X is a NaN (Not a Number)."
79 (not (zerop (ldb sb
!vm
:single-float-significand-byte bits
)))
80 (or (not (zerop (ldb sb
!vm
:double-float-significand-byte hi
)))
82 #!+(and long-float x86
)
83 (or (not (zerop (ldb sb
!vm
:long-float-significand-byte hi
)))
86 (!define-float-dispatching-function float-trapping-nan-p
87 "Return true if the float X is a trapping NaN (Not a Number)."
88 ;; HPPA (and apparently MIPS) have trapping NaNs (SNaNs) with the
89 ;; trapping-nan-bit SET. PPC, SPARC, Alpha, and x86 (and presumably
90 ;; x86-64, ARM, and ARM64) have trapping NaNs (SNaNs) with the
91 ;; trapping-nan-bit CLEAR. Note that the given implementation
92 ;; considers infinities to be FLOAT-TRAPPING-NAN-P on most
95 (zerop (logand (ldb sb
!vm
:single-float-significand-byte bits
)
96 sb
!vm
:single-float-trapping-nan-bit
))
98 (not (zerop (logand (ldb sb
!vm
:single-float-significand-byte bits
)
99 sb
!vm
:single-float-trapping-nan-bit
)))
101 (zerop (logand (ldb sb
!vm
:double-float-significand-byte hi
)
102 sb
!vm
:double-float-trapping-nan-bit
))
104 (not (zerop (logand (ldb sb
!vm
:double-float-significand-byte hi
)
105 sb
!vm
:double-float-trapping-nan-bit
)))
106 #!+(and long-float x86
)
107 (zerop (logand (ldb sb
!vm
:long-float-significand-byte hi
)
108 sb
!vm
:long-float-trapping-nan-bit
)))
110 ;;; If denormalized, use a subfunction from INTEGER-DECODE-FLOAT to find the
111 ;;; actual exponent (and hence how denormalized it is), otherwise we just
112 ;;; return the number of digits or 0.
113 #!-sb-fluid
(declaim (maybe-inline float-precision
))
114 (defun float-precision (f)
116 "Return a non-negative number of significant digits in its float argument.
117 Will be less than FLOAT-DIGITS if denormalized or zero."
118 (declare (explicit-check))
119 (macrolet ((frob (digits bias decode
)
121 ((float-denormalized-p f
)
122 (multiple-value-bind (ignore exp
) (,decode f
)
123 (declare (ignore ignore
))
125 (+ ,digits
(1- ,digits
) ,bias exp
))))
128 (number-dispatch ((f float
))
130 (frob sb
!vm
:single-float-digits sb
!vm
:single-float-bias
131 integer-decode-single-denorm
))
133 (frob sb
!vm
:double-float-digits sb
!vm
:double-float-bias
134 integer-decode-double-denorm
))
137 (frob sb
!vm
:long-float-digits sb
!vm
:long-float-bias
138 integer-decode-long-denorm
)))))
140 (defun float-sign (float1 &optional
(float2 (float 1 float1
)))
142 "Return a floating-point number that has the same sign as
143 FLOAT1 and, if FLOAT2 is given, has the same absolute value
145 (declare (float float1 float2
) (explicit-check))
146 (* (if (etypecase float1
147 (single-float (minusp (single-float-bits float1
)))
148 (double-float (minusp (double-float-high-bits float1
)))
150 (long-float (minusp (long-float-exp-bits float1
))))
155 (defun float-format-digits (format)
157 ((short-float single-float
) sb
!vm
:single-float-digits
)
158 ((double-float #!-long-float long-float
) sb
!vm
:double-float-digits
)
160 (long-float sb
!vm
:long-float-digits
)))
162 #!-sb-fluid
(declaim (inline float-digits float-radix
))
164 (defun float-digits (f)
165 (declare (explicit-check))
166 (number-dispatch ((f float
))
167 ((single-float) sb
!vm
:single-float-digits
)
168 ((double-float) sb
!vm
:double-float-digits
)
170 ((long-float) sb
!vm
:long-float-digits
)))
172 (defun float-radix (x)
174 "Return (as an integer) the radix b of its floating-point argument."
175 (declare (ignore x
) (type float x
))
178 ;;;; INTEGER-DECODE-FLOAT and DECODE-FLOAT
181 (declaim (maybe-inline integer-decode-single-float
182 integer-decode-double-float
))
184 ;;; Handle the denormalized case of INTEGER-DECODE-FLOAT for SINGLE-FLOAT.
185 (defun integer-decode-single-denorm (x)
186 (declare (type single-float x
))
187 (let* ((bits (single-float-bits (abs x
)))
188 (sig (ash (ldb sb
!vm
:single-float-significand-byte bits
) 1))
190 (declare (type (unsigned-byte 24) sig
)
191 (type (integer 0 23) extra-bias
))
193 (unless (zerop (logand sig sb
!vm
:single-float-hidden-bit
))
195 (setq sig
(ash sig
1))
198 (- (- sb
!vm
:single-float-bias
)
199 sb
!vm
:single-float-digits
201 (if (minusp (float-sign x
)) -
1 1))))
203 ;;; Handle the single-float case of INTEGER-DECODE-FLOAT. If an infinity or
204 ;;; NaN, error. If a denorm, call i-d-s-DENORM to handle it.
205 (defun integer-decode-single-float (x)
206 (declare (single-float x
))
207 (let* ((bits (single-float-bits (abs x
)))
208 (exp (ldb sb
!vm
:single-float-exponent-byte bits
))
209 (sig (ldb sb
!vm
:single-float-significand-byte bits
))
210 (sign (if (minusp (float-sign x
)) -
1 1))
211 (biased (- exp sb
!vm
:single-float-bias sb
!vm
:single-float-digits
)))
212 (declare (fixnum biased
))
213 (unless (<= exp sb
!vm
:single-float-normal-exponent-max
)
214 (error "can't decode NaN or infinity: ~S" x
))
215 (cond ((and (zerop exp
) (zerop sig
))
216 (values 0 biased sign
))
217 ((< exp sb
!vm
:single-float-normal-exponent-min
)
218 (integer-decode-single-denorm x
))
220 (values (logior sig sb
!vm
:single-float-hidden-bit
) biased sign
)))))
222 ;;; like INTEGER-DECODE-SINGLE-DENORM, only doubly so
223 (defun integer-decode-double-denorm (x)
224 (declare (type double-float x
))
225 (let* ((high-bits (double-float-high-bits (abs x
)))
226 (sig-high (ldb sb
!vm
:double-float-significand-byte high-bits
))
227 (low-bits (double-float-low-bits x
))
228 (sign (if (minusp (float-sign x
)) -
1 1))
229 (biased (- (- sb
!vm
:double-float-bias
) sb
!vm
:double-float-digits
)))
232 (extra-bias (- sb
!vm
:double-float-digits
33))
234 (declare (type (unsigned-byte 32) sig
) (fixnum extra-bias
))
236 (unless (zerop (logand sig bit
)) (return))
237 (setq sig
(ash sig
1))
239 (values (ash sig
(- sb
!vm
:double-float-digits
32))
240 (truly-the fixnum
(- biased extra-bias
))
242 (let ((sig (ash sig-high
1))
244 (declare (type (unsigned-byte 32) sig
) (fixnum extra-bias
))
246 (unless (zerop (logand sig sb
!vm
:double-float-hidden-bit
))
248 (setq sig
(ash sig
1))
250 (values (logior (ash sig
32) (ash low-bits
(1- extra-bias
)))
251 (truly-the fixnum
(- biased extra-bias
))
254 ;;; like INTEGER-DECODE-SINGLE-FLOAT, only doubly so
255 (defun integer-decode-double-float (x)
256 (declare (double-float x
))
258 (hi (double-float-high-bits abs
))
259 (lo (double-float-low-bits abs
))
260 (exp (ldb sb
!vm
:double-float-exponent-byte hi
))
261 (sig (ldb sb
!vm
:double-float-significand-byte hi
))
262 (sign (if (minusp (float-sign x
)) -
1 1))
263 (biased (- exp sb
!vm
:double-float-bias sb
!vm
:double-float-digits
)))
264 (declare (fixnum biased
))
265 (unless (<= exp sb
!vm
:double-float-normal-exponent-max
)
266 (error "Can't decode NaN or infinity: ~S." x
))
267 (cond ((and (zerop exp
) (zerop sig
) (zerop lo
))
268 (values 0 biased sign
))
269 ((< exp sb
!vm
:double-float-normal-exponent-min
)
270 (integer-decode-double-denorm x
))
273 (logior (ash (logior (ldb sb
!vm
:double-float-significand-byte hi
)
274 sb
!vm
:double-float-hidden-bit
)
279 #!+(and long-float x86
)
280 (defun integer-decode-long-denorm (x)
281 (declare (type long-float x
))
282 (let* ((high-bits (long-float-high-bits (abs x
)))
283 (sig-high (ldb sb
!vm
:long-float-significand-byte high-bits
))
284 (low-bits (long-float-low-bits x
))
285 (sign (if (minusp (float-sign x
)) -
1 1))
286 (biased (- (- sb
!vm
:long-float-bias
) sb
!vm
:long-float-digits
)))
289 (extra-bias (- sb
!vm
:long-float-digits
33))
291 (declare (type (unsigned-byte 32) sig
) (fixnum extra-bias
))
293 (unless (zerop (logand sig bit
)) (return))
294 (setq sig
(ash sig
1))
296 (values (ash sig
(- sb
!vm
:long-float-digits
32))
297 (truly-the fixnum
(- biased extra-bias
))
299 (let ((sig (ash sig-high
1))
301 (declare (type (unsigned-byte 32) sig
) (fixnum extra-bias
))
303 (unless (zerop (logand sig sb
!vm
:long-float-hidden-bit
))
305 (setq sig
(ash sig
1))
307 (values (logior (ash sig
32) (ash low-bits
(1- extra-bias
)))
308 (truly-the fixnum
(- biased extra-bias
))
311 #!+(and long-float x86
)
312 (defun integer-decode-long-float (x)
313 (declare (long-float x
))
314 (let* ((hi (long-float-high-bits x
))
315 (lo (long-float-low-bits x
))
316 (exp-bits (long-float-exp-bits x
))
317 (exp (ldb sb
!vm
:long-float-exponent-byte exp-bits
))
318 (sign (if (minusp exp-bits
) -
1 1))
319 (biased (- exp sb
!vm
:long-float-bias sb
!vm
:long-float-digits
)))
320 (declare (fixnum biased
))
321 (unless (<= exp sb
!vm
:long-float-normal-exponent-max
)
322 (error "can't decode NaN or infinity: ~S" x
))
323 (cond ((and (zerop exp
) (zerop hi
) (zerop lo
))
324 (values 0 biased sign
))
325 ((< exp sb
!vm
:long-float-normal-exponent-min
)
326 (integer-decode-long-denorm x
))
328 (values (logior (ash hi
32) lo
) biased sign
)))))
330 ;;; Dispatch to the correct type-specific i-d-f function.
331 (defun integer-decode-float (x)
333 "Return three values:
334 1) an integer representation of the significand.
335 2) the exponent for the power of 2 that the significand must be multiplied
336 by to get the actual value. This differs from the DECODE-FLOAT exponent
337 by FLOAT-DIGITS, since the significand has been scaled to have all its
338 digits before the radix point.
339 3) -1 or 1 (i.e. the sign of the argument.)"
340 (declare (explicit-check))
341 (number-dispatch ((x float
))
343 (integer-decode-single-float x
))
345 (integer-decode-double-float x
))
348 (integer-decode-long-float x
))))
350 #!-sb-fluid
(declaim (maybe-inline decode-single-float decode-double-float
))
352 ;;; Handle the denormalized case of DECODE-SINGLE-FLOAT. We call
353 ;;; INTEGER-DECODE-SINGLE-DENORM and then make the result into a float.
354 (defun decode-single-denorm (x)
355 (declare (type single-float x
))
356 (multiple-value-bind (sig exp sign
) (integer-decode-single-denorm x
)
357 (values (make-single-float
358 (dpb sig sb
!vm
:single-float-significand-byte
359 (dpb sb
!vm
:single-float-bias
360 sb
!vm
:single-float-exponent-byte
362 (truly-the fixnum
(+ exp sb
!vm
:single-float-digits
))
365 ;;; Handle the single-float case of DECODE-FLOAT. If an infinity or NaN,
366 ;;; error. If a denorm, call d-s-DENORM to handle it.
367 (defun decode-single-float (x)
368 (declare (single-float x
))
369 (let* ((bits (single-float-bits (abs x
)))
370 (exp (ldb sb
!vm
:single-float-exponent-byte bits
))
371 (sign (float-sign x
))
372 (biased (truly-the single-float-exponent
373 (- exp sb
!vm
:single-float-bias
))))
374 (unless (<= exp sb
!vm
:single-float-normal-exponent-max
)
375 (error "can't decode NaN or infinity: ~S" x
))
377 (values 0.0f0 biased sign
))
378 ((< exp sb
!vm
:single-float-normal-exponent-min
)
379 (decode-single-denorm x
))
381 (values (make-single-float
382 (dpb sb
!vm
:single-float-bias
383 sb
!vm
:single-float-exponent-byte
387 ;;; like DECODE-SINGLE-DENORM, only doubly so
388 (defun decode-double-denorm (x)
389 (declare (double-float x
))
390 (multiple-value-bind (sig exp sign
) (integer-decode-double-denorm x
)
391 (values (make-double-float
392 (dpb (logand (ash sig -
32) (lognot sb
!vm
:double-float-hidden-bit
))
393 sb
!vm
:double-float-significand-byte
394 (dpb sb
!vm
:double-float-bias
395 sb
!vm
:double-float-exponent-byte
0))
396 (ldb (byte 32 0) sig
))
397 (truly-the fixnum
(+ exp sb
!vm
:double-float-digits
))
400 ;;; like DECODE-SINGLE-FLOAT, only doubly so
401 (defun decode-double-float (x)
402 (declare (double-float x
))
404 (hi (double-float-high-bits abs
))
405 (lo (double-float-low-bits abs
))
406 (exp (ldb sb
!vm
:double-float-exponent-byte hi
))
407 (sign (float-sign x
))
408 (biased (truly-the double-float-exponent
409 (- exp sb
!vm
:double-float-bias
))))
410 (unless (<= exp sb
!vm
:double-float-normal-exponent-max
)
411 (error "can't decode NaN or infinity: ~S" x
))
413 (values 0.0d0 biased sign
))
414 ((< exp sb
!vm
:double-float-normal-exponent-min
)
415 (decode-double-denorm x
))
417 (values (make-double-float
418 (dpb sb
!vm
:double-float-bias
419 sb
!vm
:double-float-exponent-byte hi
)
423 #!+(and long-float x86
)
424 (defun decode-long-denorm (x)
425 (declare (long-float x
))
426 (multiple-value-bind (sig exp sign
) (integer-decode-long-denorm x
)
427 (values (make-long-float sb
!vm
:long-float-bias
(ash sig -
32)
428 (ldb (byte 32 0) sig
))
429 (truly-the fixnum
(+ exp sb
!vm
:long-float-digits
))
432 #!+(and long-float x86
)
433 (defun decode-long-float (x)
434 (declare (long-float x
))
435 (let* ((hi (long-float-high-bits x
))
436 (lo (long-float-low-bits x
))
437 (exp-bits (long-float-exp-bits x
))
438 (exp (ldb sb
!vm
:long-float-exponent-byte exp-bits
))
439 (sign (if (minusp exp-bits
) -
1l0 1l0))
440 (biased (truly-the long-float-exponent
441 (- exp sb
!vm
:long-float-bias
))))
442 (unless (<= exp sb
!vm
:long-float-normal-exponent-max
)
443 (error "can't decode NaN or infinity: ~S" x
))
445 (values 0.0l0 biased sign
))
446 ((< exp sb
!vm
:long-float-normal-exponent-min
)
447 (decode-long-denorm x
))
449 (values (make-long-float
450 (dpb sb
!vm
:long-float-bias sb
!vm
:long-float-exponent-byte
456 ;;; Dispatch to the appropriate type-specific function.
457 (defun decode-float (f)
459 "Return three values:
460 1) a floating-point number representing the significand. This is always
461 between 0.5 (inclusive) and 1.0 (exclusive).
462 2) an integer representing the exponent.
463 3) -1.0 or 1.0 (i.e. the sign of the argument.)"
464 (declare (explicit-check))
465 (number-dispatch ((f float
))
467 (decode-single-float f
))
469 (decode-double-float f
))
472 (decode-long-float f
))))
476 #!-sb-fluid
(declaim (maybe-inline scale-single-float scale-double-float
))
478 ;;; Handle float scaling where the X is denormalized or the result is
479 ;;; denormalized or underflows to 0.
480 (defun scale-float-maybe-underflow (x exp
)
481 (multiple-value-bind (sig old-exp
) (integer-decode-float x
)
482 (let* ((digits (float-digits x
))
483 (new-exp (+ exp old-exp digits
485 (single-float sb
!vm
:single-float-bias
)
486 (double-float sb
!vm
:double-float-bias
))))
487 (sign (if (minusp (float-sign x
)) 1 0)))
491 (single-float sb
!vm
:single-float-normal-exponent-min
)
492 (double-float sb
!vm
:double-float-normal-exponent-min
)))
493 (when (sb!vm
:current-float-trap
:inexact
)
494 (error 'floating-point-inexact
:operation
'scale-float
495 :operands
(list x exp
)))
496 (when (sb!vm
:current-float-trap
:underflow
)
497 (error 'floating-point-underflow
:operation
'scale-float
498 :operands
(list x exp
)))
499 (let ((shift (1- new-exp
)))
500 (if (< shift
(- (1- digits
)))
503 (single-float (single-from-bits sign
0 (ash sig shift
)))
504 (double-float (double-from-bits sign
0 (ash sig shift
)))))))
507 (single-float (single-from-bits sign new-exp sig
))
508 (double-float (double-from-bits sign new-exp sig
))))))))
510 ;;; Called when scaling a float overflows, or the original float was a
511 ;;; NaN or infinity. If overflow errors are trapped, then error,
512 ;;; otherwise return the appropriate infinity. If a NaN, signal or not
514 (defun scale-float-maybe-overflow (x exp
)
516 ((float-infinity-p x
)
517 ;; Infinity is infinity, no matter how small...
520 (when (and (float-trapping-nan-p x
)
521 (sb!vm
:current-float-trap
:invalid
))
522 (error 'floating-point-invalid-operation
:operation
'scale-float
523 :operands
(list x exp
)))
526 (when (sb!vm
:current-float-trap
:overflow
)
527 (error 'floating-point-overflow
:operation
'scale-float
528 :operands
(list x exp
)))
529 (when (sb!vm
:current-float-trap
:inexact
)
530 (error 'floating-point-inexact
:operation
'scale-float
531 :operands
(list x exp
)))
535 ;; SINGLE-FLOAT-POSITIVE-INFINITY
536 (single-from-bits 0 (1+ sb
!vm
:single-float-normal-exponent-max
) 0))
538 ;; DOUBLE-FLOAT-POSITIVE-INFINITY
539 (double-from-bits 0 (1+ sb
!vm
:double-float-normal-exponent-max
) 0)))))))
541 ;;; Scale a single or double float, calling the correct over/underflow
543 (defun scale-single-float (x exp
)
544 (declare (single-float x
) (integer exp
))
547 (let* ((bits (single-float-bits x
))
548 (old-exp (ldb sb
!vm
:single-float-exponent-byte bits
))
549 (new-exp (+ old-exp exp
)))
552 ((or (< old-exp sb
!vm
:single-float-normal-exponent-min
)
553 (< new-exp sb
!vm
:single-float-normal-exponent-min
))
554 (scale-float-maybe-underflow x exp
))
555 ((or (> old-exp sb
!vm
:single-float-normal-exponent-max
)
556 (> new-exp sb
!vm
:single-float-normal-exponent-max
))
557 (scale-float-maybe-overflow x exp
))
559 (make-single-float (dpb new-exp
560 sb
!vm
:single-float-exponent-byte
562 (unsigned-byte (scale-float-maybe-overflow x exp
))
563 ((integer * 0) (scale-float-maybe-underflow x exp
))))
564 (defun scale-double-float (x exp
)
565 (declare (double-float x
) (integer exp
))
568 (let* ((hi (double-float-high-bits x
))
569 (lo (double-float-low-bits x
))
570 (old-exp (ldb sb
!vm
:double-float-exponent-byte hi
))
571 (new-exp (+ old-exp exp
)))
574 ((or (< old-exp sb
!vm
:double-float-normal-exponent-min
)
575 (< new-exp sb
!vm
:double-float-normal-exponent-min
))
576 (scale-float-maybe-underflow x exp
))
577 ((or (> old-exp sb
!vm
:double-float-normal-exponent-max
)
578 (> new-exp sb
!vm
:double-float-normal-exponent-max
))
579 (scale-float-maybe-overflow x exp
))
581 (make-double-float (dpb new-exp sb
!vm
:double-float-exponent-byte hi
)
583 (unsigned-byte (scale-float-maybe-overflow x exp
))
584 ((integer * 0) (scale-float-maybe-underflow x exp
))))
586 #!+(and x86 long-float
)
587 (defun scale-long-float (x exp
)
588 (declare (long-float x
) (integer exp
))
591 ;;; Dispatch to the correct type-specific scale-float function.
592 (defun scale-float (f ex
)
594 "Return the value (* f (expt (float 2 f) ex)), but with no unnecessary loss
595 of precision or overflow."
596 (declare (explicit-check))
597 (number-dispatch ((f float
))
599 (scale-single-float f ex
))
601 (scale-double-float f ex
))
604 (scale-long-float f ex
))))
606 ;;;; converting to/from floats
608 (defun float (number &optional
(other () otherp
))
610 "Converts any REAL to a float. If OTHER is not provided, it returns a
611 SINGLE-FLOAT if NUMBER is not already a FLOAT. If OTHER is provided, the
612 result is the same float format as OTHER."
613 (declare (explicit-check))
615 (number-dispatch ((number real
) (other float
))
616 (((foreach rational single-float double-float
#!+long-float long-float
)
617 (foreach single-float double-float
#!+long-float long-float
))
618 (coerce number
'(dispatch-type other
))))
621 (coerce number
'single-float
))))
623 (macrolet ((frob (name type
)
625 (number-dispatch ((x real
))
626 (((foreach single-float double-float
#!+long-float long-float
630 (bignum-to-float x
',type
))
632 (float-ratio x
',type
))))))
633 (frob %single-float single-float
)
634 (frob %double-float double-float
)
636 (frob %long-float long-float
))
638 ;;; Convert a ratio to a float. We avoid any rounding error by doing an
639 ;;; integer division. Accuracy is important to preserve print-read
640 ;;; consistency, since this is ultimately how the reader reads a float. We
641 ;;; scale the numerator by a power of two until the division results in the
642 ;;; desired number of fraction bits, then do round-to-nearest.
643 (defun float-ratio (x format
)
644 (let* ((signed-num (numerator x
))
645 (plusp (plusp signed-num
))
646 (num (if plusp signed-num
(- signed-num
)))
647 (den (denominator x
))
648 (digits (float-format-digits format
))
650 (declare (fixnum digits scale
))
651 ;; Strip any trailing zeros from the denominator and move it into the scale
652 ;; factor (to minimize the size of the operands.)
653 (let ((den-twos (1- (integer-length (logxor den
(1- den
))))))
654 (declare (fixnum den-twos
))
655 (decf scale den-twos
)
656 (setq den
(ash den
(- den-twos
))))
657 ;; Guess how much we need to scale by from the magnitudes of the numerator
658 ;; and denominator. We want one extra bit for a guard bit.
659 (let* ((num-len (integer-length num
))
660 (den-len (integer-length den
))
661 (delta (- den-len num-len
))
662 (shift (1+ (the fixnum
(+ delta digits
))))
663 (shifted-num (ash num shift
)))
664 (declare (fixnum delta shift
))
666 (labels ((float-and-scale (bits)
667 (let* ((bits (ash bits -
1))
668 (len (integer-length bits
)))
669 (cond ((> len digits
)
670 (aver (= len
(the fixnum
(1+ digits
))))
671 (scale-float (floatit (ash bits -
1)) (1+ scale
)))
673 (scale-float (floatit bits
) scale
)))))
675 (let ((sign (if plusp
0 1)))
678 (single-from-bits sign sb
!vm
:single-float-bias bits
))
680 (double-from-bits sign sb
!vm
:double-float-bias bits
))
683 (long-from-bits sign sb
!vm
:long-float-bias bits
))))))
685 (multiple-value-bind (fraction-and-guard rem
)
686 (truncate shifted-num den
)
687 (let ((extra (- (integer-length fraction-and-guard
) digits
)))
688 (declare (fixnum extra
))
691 ((oddp fraction-and-guard
)
695 (if (zerop (logand fraction-and-guard
2))
697 (1+ fraction-and-guard
)))
698 (float-and-scale (1+ fraction-and-guard
)))))
700 (return (float-and-scale fraction-and-guard
)))))
701 (setq shifted-num
(ash shifted-num -
1))
704 ;;; These might be useful if we ever have a machine without float/integer
705 ;;; conversion hardware. For now, we'll use special ops that
706 ;;; uninterruptibly frob the rounding modes & do ieee round-to-integer.
709 ;; The compiler compiles a call to this when we are doing %UNARY-TRUNCATE
710 ;; and the result is known to be a fixnum. We can avoid some generic
711 ;; arithmetic in this case.
712 (defun %unary-truncate-single-float
/fixnum
(x)
713 (declare (single-float x
) (values fixnum
))
714 (locally (declare (optimize (speed 3) (safety 0)))
715 (let* ((bits (single-float-bits x
))
716 (exp (ldb sb
!vm
:single-float-exponent-byte bits
))
717 (frac (logior (ldb sb
!vm
:single-float-significand-byte bits
)
718 sb
!vm
:single-float-hidden-bit
))
719 (shift (- exp sb
!vm
:single-float-digits sb
!vm
:single-float-bias
)))
720 (when (> exp sb
!vm
:single-float-normal-exponent-max
)
721 (error 'floating-point-invalid-operation
:operator
'truncate
723 (if (<= shift
(- sb
!vm
:single-float-digits
))
725 (let ((res (ash frac shift
)))
726 (declare (type (unsigned-byte 31) res
))
730 ;; Double-float version of this operation (see above single op).
731 (defun %unary-truncate-double-float
/fixnum
(x)
732 (declare (double-float x
) (values fixnum
))
733 (locally (declare (optimize (speed 3) (safety 0)))
734 (let* ((hi-bits (double-float-high-bits x
))
735 (exp (ldb sb
!vm
:double-float-exponent-byte hi-bits
))
736 (frac (logior (ldb sb
!vm
:double-float-significand-byte hi-bits
)
737 sb
!vm
:double-float-hidden-bit
))
738 (shift (- exp
(- sb
!vm
:double-float-digits sb
!vm
:n-word-bits
)
739 sb
!vm
:double-float-bias
)))
740 (when (> exp sb
!vm
:double-float-normal-exponent-max
)
741 (error 'floating-point-invalid-operation
:operator
'truncate
743 (if (<= shift
(- sb
!vm
:n-word-bits sb
!vm
:double-float-digits
))
745 (let* ((res-hi (ash frac shift
))
746 (res (if (plusp shift
)
749 (ash (double-float-low-bits x
)
750 (- shift sb
!vm
:n-word-bits
))))
752 (declare (type (unsigned-byte 31) res-hi res
))
757 ;;; This function is called when we are doing a truncate without any funky
758 ;;; divisor, i.e. converting a float or ratio to an integer. Note that we do
759 ;;; *not* return the second value of truncate, so it must be computed by the
760 ;;; caller if needed.
762 ;;; In the float case, we pick off small arguments so that compiler
763 ;;; can use special-case operations. We use an exclusive test, since
764 ;;; (due to round-off error), (float most-positive-fixnum) is likely
765 ;;; to be equal to (1+ most-positive-fixnum). An exclusive test is
766 ;;; good enough, because most-positive-fixnum will be one less than a
767 ;;; power of two, and that power of two will be exactly representable
768 ;;; as a float (at least until we get 128-bit fixnums).
769 (defun %unary-truncate
(number)
770 (number-dispatch ((number real
))
772 ((ratio) (values (truncate (numerator number
) (denominator number
))))
773 (((foreach single-float double-float
#!+long-float long-float
))
774 (if (< (float most-negative-fixnum number
)
776 (float most-positive-fixnum number
))
777 (truly-the fixnum
(%unary-truncate number
))
778 (multiple-value-bind (bits exp
) (integer-decode-float number
)
779 (let ((res (ash bits exp
)))
784 ;;; Specialized versions for floats.
785 (macrolet ((def (type name
)
786 `(defun ,name
(number)
787 (if (< ,(coerce sb
!xc
:most-negative-fixnum type
)
789 ,(coerce sb
!xc
:most-positive-fixnum type
))
790 (truly-the fixnum
(,name number
))
791 ;; General -- slow -- case.
792 (multiple-value-bind (bits exp
) (integer-decode-float number
)
793 (let ((res (ash bits exp
)))
797 (def single-float %unary-truncate
/single-float
)
798 (def double-float %unary-truncate
/double-float
)
800 (def double-float %unary-truncate
/long-float
))
802 ;;; Similar to %UNARY-TRUNCATE, but rounds to the nearest integer. If we
803 ;;; can't use the round primitive, then we do our own round-to-nearest on the
804 ;;; result of i-d-f. [Note that this rounding will really only happen with
805 ;;; double floats, since the whole single-float fraction will fit in a fixnum,
806 ;;; so all single-floats larger than most-positive-fixnum can be precisely
807 ;;; represented by an integer.]
808 (defun %unary-round
(number)
809 (number-dispatch ((number real
))
811 ((ratio) (values (round (numerator number
) (denominator number
))))
812 (((foreach single-float double-float
#!+long-float long-float
))
813 (if (< (float most-negative-fixnum number
)
815 (float most-positive-fixnum number
))
816 (truly-the fixnum
(%unary-round number
))
817 (multiple-value-bind (bits exp
) (integer-decode-float number
)
818 (let* ((shifted (ash bits exp
))
819 (rounded (if (minusp exp
)
820 (let ((fractional-bits (logand bits
(lognot (ash -
1 (- exp
)))))
821 (0.5bits
(ash 1 (- -
1 exp
))))
823 ((> fractional-bits
0.5bits
) (1+ shifted
))
824 ((< fractional-bits
0.5bits
) shifted
)
825 (t (if (oddp shifted
) (1+ shifted
) shifted
))))
831 (defun %unary-ftruncate
(number)
832 (number-dispatch ((number real
))
833 ((integer) (float number
))
834 ((ratio) (float (truncate (numerator number
) (denominator number
))))
835 (((foreach single-float double-float
#!+long-float long-float
))
836 (%unary-ftruncate number
))))
840 "RATIONAL produces a rational number for any real numeric argument. This is
841 more efficient than RATIONALIZE, but it assumes that floating-point is
842 completely accurate, giving a result that isn't as pretty."
843 (declare (explicit-check))
844 (number-dispatch ((x real
))
845 (((foreach single-float double-float
#!+long-float long-float
))
846 (multiple-value-bind (bits exp
) (integer-decode-float x
)
849 (let* ((int (if (minusp x
) (- bits
) bits
))
850 (digits (float-digits x
))
853 (integer-/-integer int
(ash 1 (+ digits
(- ex
))))
854 (integer-/-integer
(ash int ex
) (ash 1 digits
)))))))
857 ;;; This algorithm for RATIONALIZE, due to Bruno Haible, is included
860 ;;; Algorithm (recursively presented):
861 ;;; If x is a rational number, return x.
862 ;;; If x = 0.0, return 0.
863 ;;; If x < 0.0, return (- (rationalize (- x))).
865 ;;; Call (integer-decode-float x). It returns a m,e,s=1 (mantissa,
867 ;;; If m = 0 or e >= 0: return x = m*2^e.
868 ;;; Search a rational number between a = (m-1/2)*2^e and b = (m+1/2)*2^e
869 ;;; with smallest possible numerator and denominator.
870 ;;; Note 1: If m is a power of 2, we ought to take a = (m-1/4)*2^e.
871 ;;; But in this case the result will be x itself anyway, regardless of
872 ;;; the choice of a. Therefore we can simply ignore this case.
873 ;;; Note 2: At first, we need to consider the closed interval [a,b].
874 ;;; but since a and b have the denominator 2^(|e|+1) whereas x itself
875 ;;; has a denominator <= 2^|e|, we can restrict the seach to the open
877 ;;; So, for given a and b (0 < a < b) we are searching a rational number
878 ;;; y with a <= y <= b.
879 ;;; Recursive algorithm fraction_between(a,b):
882 ;;; then return c ; because a <= c < b, c integer
884 ;;; ; a is not integer (otherwise we would have had c = a < b)
885 ;;; k := c-1 ; k = floor(a), k < a < b <= k+1
886 ;;; return y = k + 1/fraction_between(1/(b-k), 1/(a-k))
887 ;;; ; note 1 <= 1/(b-k) < 1/(a-k)
889 ;;; You can see that we are actually computing a continued fraction expansion.
891 ;;; Algorithm (iterative):
892 ;;; If x is rational, return x.
893 ;;; Call (integer-decode-float x). It returns a m,e,s (mantissa,
895 ;;; If m = 0 or e >= 0, return m*2^e*s. (This includes the case x = 0.0.)
896 ;;; Create rational numbers a := (2*m-1)*2^(e-1) and b := (2*m+1)*2^(e-1)
897 ;;; (positive and already in lowest terms because the denominator is a
898 ;;; power of two and the numerator is odd).
899 ;;; Start a continued fraction expansion
900 ;;; p[-1] := 0, p[0] := 1, q[-1] := 1, q[0] := 0, i := 0.
904 ;;; then k := c-1, partial_quotient(k), (a,b) := (1/(b-k),1/(a-k)),
906 ;;; finally partial_quotient(c).
907 ;;; Here partial_quotient(c) denotes the iteration
908 ;;; i := i+1, p[i] := c*p[i-1]+p[i-2], q[i] := c*q[i-1]+q[i-2].
909 ;;; At the end, return s * (p[i]/q[i]).
910 ;;; This rational number is already in lowest terms because
911 ;;; p[i]*q[i-1]-p[i-1]*q[i] = (-1)^i.
914 ;;; Hardy, Wright: An introduction to number theory
916 ;;; <http://modular.fas.harvard.edu/edu/Fall2001/124/lectures/lecture17/lecture17/>
917 ;;; <http://modular.fas.harvard.edu/edu/Fall2001/124/lectures/lecture17/lecture18/>
919 (defun rationalize (x)
921 "Converts any REAL to a RATIONAL. Floats are converted to a simple rational
922 representation exploiting the assumption that floats are only accurate to
923 their precision. RATIONALIZE (and also RATIONAL) preserve the invariant:
924 (= x (float (rationalize x) x))"
925 (declare (explicit-check))
926 (number-dispatch ((x real
))
927 (((foreach single-float double-float
#!+long-float long-float
))
928 ;; This is a fairly straigtforward implementation of the
929 ;; iterative algorithm above.
930 (multiple-value-bind (frac expo sign
)
931 (integer-decode-float x
)
932 (cond ((or (zerop frac
) (>= expo
0))
937 ;; expo < 0 and (2*m-1) and (2*m+1) are coprime to 2^(1-e),
938 ;; so build the fraction up immediately, without having to do
940 (let ((a (build-ratio (- (* 2 frac
) 1) (ash 1 (- 1 expo
))))
941 (b (build-ratio (+ (* 2 frac
) 1) (ash 1 (- 1 expo
))))
946 (do ((c (ceiling a
) (ceiling a
)))
948 (let ((top (+ (* c p1
) p0
))
949 (bot (+ (* c q1
) q0
)))
950 (build-ratio (if (minusp sign
)
956 (q2 (+ (* k q1
) q0
)))