2 * Copyright (C) 2005 Josef Cejka
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup softfloat
36 #include "conversion.h"
37 #include "comparison.h"
40 float64
convertFloat32ToFloat64(float32 a
)
45 result
.parts
.sign
= a
.parts
.sign
;
46 result
.parts
.fraction
= a
.parts
.fraction
;
47 result
.parts
.fraction
<<= (FLOAT64_FRACTION_SIZE
- FLOAT32_FRACTION_SIZE
);
49 if ((isFloat32Infinity(a
))||(isFloat32NaN(a
))) {
50 result
.parts
.exp
= 0x7FF;
51 /* TODO; check if its correct for SigNaNs*/
55 result
.parts
.exp
= a
.parts
.exp
+ ( (int)FLOAT64_BIAS
- FLOAT32_BIAS
);
56 if (a
.parts
.exp
== 0) {
57 /* normalize denormalized numbers */
59 if (result
.parts
.fraction
== 0ll) { /* fix zero */
60 result
.parts
.exp
= 0ll;
64 frac
= result
.parts
.fraction
;
66 while (!(frac
& (0x10000000000000ll
))) {
72 result
.parts
.fraction
= frac
;
79 float32
convertFloat64ToFloat32(float64 a
)
85 result
.parts
.sign
= a
.parts
.sign
;
87 if (isFloat64NaN(a
)) {
89 result
.parts
.exp
= 0xFF;
91 if (isFloat64SigNaN(a
)) {
92 result
.parts
.fraction
= 0x400000; /* set first bit of fraction nonzero */
96 result
.parts
.fraction
= 0x1; /* fraction nonzero but its first bit is zero */
100 if (isFloat64Infinity(a
)) {
101 result
.parts
.fraction
= 0;
102 result
.parts
.exp
= 0xFF;
106 exp
= (int)a
.parts
.exp
- FLOAT64_BIAS
+ FLOAT32_BIAS
;
110 result
.parts
.fraction
= 0;
111 result
.parts
.exp
= 0xFF;
114 } else if (exp
<= 0 ) {
116 /* underflow or denormalized */
118 result
.parts
.exp
= 0;
121 if (exp
> FLOAT32_FRACTION_SIZE
) {
122 /* FIXME: underflow */
123 result
.parts
.fraction
= 0;
129 frac
= a
.parts
.fraction
;
130 frac
|= 0x10000000000000ll
; /* denormalize and set hidden bit */
132 frac
>>= (FLOAT64_FRACTION_SIZE
- FLOAT32_FRACTION_SIZE
+ 1);
138 result
.parts
.fraction
= frac
;
143 result
.parts
.exp
= exp
;
144 result
.parts
.fraction
= a
.parts
.fraction
>> (FLOAT64_FRACTION_SIZE
- FLOAT32_FRACTION_SIZE
);
149 /** Helping procedure for converting float32 to uint32
150 * @param a floating point number in normalized form (no NaNs or Inf are checked )
151 * @return unsigned integer
153 static uint32_t _float32_to_uint32_helper(float32 a
)
157 if (a
.parts
.exp
< FLOAT32_BIAS
) {
162 frac
= a
.parts
.fraction
;
164 frac
|= FLOAT32_HIDDEN_BIT_MASK
;
165 /* shift fraction to left so hidden bit will be the most significant bit */
166 frac
<<= 32 - FLOAT32_FRACTION_SIZE
- 1;
168 frac
>>= 32 - (a
.parts
.exp
- FLOAT32_BIAS
) - 1;
169 if ((a
.parts
.sign
== 1) && (frac
!= 0)) {
177 /* Convert float to unsigned int32
178 * FIXME: Im not sure what to return if overflow/underflow happens
179 * - now its the biggest or the smallest int
181 uint32_t float32_to_uint32(float32 a
)
183 if (isFloat32NaN(a
)) {
187 if (isFloat32Infinity(a
) || (a
.parts
.exp
>= (32 + FLOAT32_BIAS
))) {
194 return _float32_to_uint32_helper(a
);
197 /* Convert float to signed int32
198 * FIXME: Im not sure what to return if overflow/underflow happens
199 * - now its the biggest or the smallest int
201 int32_t float32_to_int32(float32 a
)
203 if (isFloat32NaN(a
)) {
207 if (isFloat32Infinity(a
) || (a
.parts
.exp
>= (32 + FLOAT32_BIAS
))) {
213 return _float32_to_uint32_helper(a
);
217 /** Helping procedure for converting float64 to uint64
218 * @param a floating point number in normalized form (no NaNs or Inf are checked )
219 * @return unsigned integer
221 static uint64_t _float64_to_uint64_helper(float64 a
)
225 if (a
.parts
.exp
< FLOAT64_BIAS
) {
230 frac
= a
.parts
.fraction
;
232 frac
|= FLOAT64_HIDDEN_BIT_MASK
;
233 /* shift fraction to left so hidden bit will be the most significant bit */
234 frac
<<= 64 - FLOAT64_FRACTION_SIZE
- 1;
236 frac
>>= 64 - (a
.parts
.exp
- FLOAT64_BIAS
) - 1;
237 if ((a
.parts
.sign
== 1) && (frac
!= 0)) {
245 /* Convert float to unsigned int64
246 * FIXME: Im not sure what to return if overflow/underflow happens
247 * - now its the biggest or the smallest int
249 uint64_t float64_to_uint64(float64 a
)
251 if (isFloat64NaN(a
)) {
255 if (isFloat64Infinity(a
) || (a
.parts
.exp
>= (64 + FLOAT64_BIAS
))) {
262 return _float64_to_uint64_helper(a
);
265 /* Convert float to signed int64
266 * FIXME: Im not sure what to return if overflow/underflow happens
267 * - now its the biggest or the smallest int
269 int64_t float64_to_int64(float64 a
)
271 if (isFloat64NaN(a
)) {
275 if (isFloat64Infinity(a
) || (a
.parts
.exp
>= (64 + FLOAT64_BIAS
))) {
281 return _float64_to_uint64_helper(a
);
288 /** Helping procedure for converting float32 to uint64
289 * @param a floating point number in normalized form (no NaNs or Inf are checked )
290 * @return unsigned integer
292 static uint64_t _float32_to_uint64_helper(float32 a
)
296 if (a
.parts
.exp
< FLOAT32_BIAS
) {
301 frac
= a
.parts
.fraction
;
303 frac
|= FLOAT32_HIDDEN_BIT_MASK
;
304 /* shift fraction to left so hidden bit will be the most significant bit */
305 frac
<<= 64 - FLOAT32_FRACTION_SIZE
- 1;
307 frac
>>= 64 - (a
.parts
.exp
- FLOAT32_BIAS
) - 1;
308 if ((a
.parts
.sign
== 1) && (frac
!= 0)) {
316 /* Convert float to unsigned int64
317 * FIXME: Im not sure what to return if overflow/underflow happens
318 * - now its the biggest or the smallest int
320 uint64_t float32_to_uint64(float32 a
)
322 if (isFloat32NaN(a
)) {
326 if (isFloat32Infinity(a
) || (a
.parts
.exp
>= (64 + FLOAT32_BIAS
))) {
333 return _float32_to_uint64_helper(a
);
336 /* Convert float to signed int64
337 * FIXME: Im not sure what to return if overflow/underflow happens
338 * - now its the biggest or the smallest int
340 int64_t float32_to_int64(float32 a
)
342 if (isFloat32NaN(a
)) {
346 if (isFloat32Infinity(a
) || (a
.parts
.exp
>= (64 + FLOAT32_BIAS
))) {
352 return _float32_to_uint64_helper(a
);
356 /* Convert float64 to unsigned int32
357 * FIXME: Im not sure what to return if overflow/underflow happens
358 * - now its the biggest or the smallest int
360 uint32_t float64_to_uint32(float64 a
)
362 if (isFloat64NaN(a
)) {
366 if (isFloat64Infinity(a
) || (a
.parts
.exp
>= (32 + FLOAT64_BIAS
))) {
373 return (uint32_t)_float64_to_uint64_helper(a
);
376 /* Convert float64 to signed int32
377 * FIXME: Im not sure what to return if overflow/underflow happens
378 * - now its the biggest or the smallest int
380 int32_t float64_to_int32(float64 a
)
382 if (isFloat64NaN(a
)) {
386 if (isFloat64Infinity(a
) || (a
.parts
.exp
>= (32 + FLOAT64_BIAS
))) {
392 return (int32_t)_float64_to_uint64_helper(a
);
395 /** Convert unsigned integer to float32
399 float32
uint32_to_float32(uint32_t i
)
405 result
.parts
.sign
= 0;
406 result
.parts
.fraction
= 0;
408 counter
= countZeroes32(i
);
410 exp
= FLOAT32_BIAS
+ 32 - counter
- 1;
423 roundFloat32(&exp
, &i
);
425 result
.parts
.fraction
= i
>> 7;
426 result
.parts
.exp
= exp
;
431 float32
int32_to_float32(int32_t i
)
436 result
= uint32_to_float32((uint32_t)(-i
));
438 result
= uint32_to_float32((uint32_t)i
);
441 result
.parts
.sign
= i
< 0;
447 float32
uint64_to_float32(uint64_t i
)
454 result
.parts
.sign
= 0;
455 result
.parts
.fraction
= 0;
457 counter
= countZeroes64(i
);
459 exp
= FLOAT32_BIAS
+ 64 - counter
- 1;
466 /* Shift all to the first 31 bits (31. will be hidden 1)*/
468 i
<<= counter
- 1 - 32;
470 i
>>= 1 + 32 - counter
;
474 roundFloat32(&exp
, &j
);
476 result
.parts
.fraction
= j
>> 7;
477 result
.parts
.exp
= exp
;
481 float32
int64_to_float32(int64_t i
)
486 result
= uint64_to_float32((uint64_t)(-i
));
488 result
= uint64_to_float32((uint64_t)i
);
491 result
.parts
.sign
= i
< 0;
496 /** Convert unsigned integer to float64
500 float64
uint32_to_float64(uint32_t i
)
507 result
.parts
.sign
= 0;
508 result
.parts
.fraction
= 0;
510 counter
= countZeroes32(i
);
512 exp
= FLOAT64_BIAS
+ 32 - counter
- 1;
520 frac
<<= counter
+ 32 - 1;
522 roundFloat64(&exp
, &frac
);
524 result
.parts
.fraction
= frac
>> 10;
525 result
.parts
.exp
= exp
;
530 float64
int32_to_float64(int32_t i
)
535 result
= uint32_to_float64((uint32_t)(-i
));
537 result
= uint32_to_float64((uint32_t)i
);
540 result
.parts
.sign
= i
< 0;
546 float64
uint64_to_float64(uint64_t i
)
552 result
.parts
.sign
= 0;
553 result
.parts
.fraction
= 0;
555 counter
= countZeroes64(i
);
557 exp
= FLOAT64_BIAS
+ 64 - counter
- 1;
570 roundFloat64(&exp
, &i
);
572 result
.parts
.fraction
= i
>> 10;
573 result
.parts
.exp
= exp
;
577 float64
int64_to_float64(int64_t i
)
582 result
= uint64_to_float64((uint64_t)(-i
));
584 result
= uint64_to_float64((uint64_t)i
);
587 result
.parts
.sign
= i
< 0;