More formatting changes.
[helenos.git] / uspace / softfloat / generic / conversion.c
blob0534b38cf0e0a259dba93be89f2b3726e58cc7d9
1 /*
2 * Copyright (C) 2005 Josef Cejka
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup softfloat
30 * @{
32 /** @file
35 #include "sftypes.h"
36 #include "conversion.h"
37 #include "comparison.h"
38 #include "common.h"
40 float64 convertFloat32ToFloat64(float32 a)
42 float64 result;
43 uint64_t frac;
45 result.parts.sign = a.parts.sign;
46 result.parts.fraction = a.parts.fraction;
47 result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE );
49 if ((isFloat32Infinity(a))||(isFloat32NaN(a))) {
50 result.parts.exp = 0x7FF;
51 /* TODO; check if its correct for SigNaNs*/
52 return result;
55 result.parts.exp = a.parts.exp + ( (int)FLOAT64_BIAS - FLOAT32_BIAS );
56 if (a.parts.exp == 0) {
57 /* normalize denormalized numbers */
59 if (result.parts.fraction == 0ll) { /* fix zero */
60 result.parts.exp = 0ll;
61 return result;
64 frac = result.parts.fraction;
66 while (!(frac & (0x10000000000000ll))) {
67 frac <<= 1;
68 --result.parts.exp;
71 ++result.parts.exp;
72 result.parts.fraction = frac;
75 return result;
79 float32 convertFloat64ToFloat32(float64 a)
81 float32 result;
82 int32_t exp;
83 uint64_t frac;
85 result.parts.sign = a.parts.sign;
87 if (isFloat64NaN(a)) {
89 result.parts.exp = 0xFF;
91 if (isFloat64SigNaN(a)) {
92 result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */
93 return result;
96 result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */
97 return result;
100 if (isFloat64Infinity(a)) {
101 result.parts.fraction = 0;
102 result.parts.exp = 0xFF;
103 return result;
106 exp = (int)a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
108 if (exp >= 0xFF) {
109 /*FIXME: overflow*/
110 result.parts.fraction = 0;
111 result.parts.exp = 0xFF;
112 return result;
114 } else if (exp <= 0 ) {
116 /* underflow or denormalized */
118 result.parts.exp = 0;
120 exp *= -1;
121 if (exp > FLOAT32_FRACTION_SIZE ) {
122 /* FIXME: underflow */
123 result.parts.fraction = 0;
124 return result;
127 /* denormalized */
129 frac = a.parts.fraction;
130 frac |= 0x10000000000000ll; /* denormalize and set hidden bit */
132 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
134 while (exp > 0) {
135 --exp;
136 frac >>= 1;
138 result.parts.fraction = frac;
140 return result;
143 result.parts.exp = exp;
144 result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
145 return result;
149 /** Helping procedure for converting float32 to uint32
150 * @param a floating point number in normalized form (no NaNs or Inf are checked )
151 * @return unsigned integer
153 static uint32_t _float32_to_uint32_helper(float32 a)
155 uint32_t frac;
157 if (a.parts.exp < FLOAT32_BIAS) {
158 /*TODO: rounding*/
159 return 0;
162 frac = a.parts.fraction;
164 frac |= FLOAT32_HIDDEN_BIT_MASK;
165 /* shift fraction to left so hidden bit will be the most significant bit */
166 frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
168 frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
169 if ((a.parts.sign == 1) && (frac != 0)) {
170 frac = ~frac;
171 ++frac;
174 return frac;
177 /* Convert float to unsigned int32
178 * FIXME: Im not sure what to return if overflow/underflow happens
179 * - now its the biggest or the smallest int
181 uint32_t float32_to_uint32(float32 a)
183 if (isFloat32NaN(a)) {
184 return MAX_UINT32;
187 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
188 if (a.parts.sign) {
189 return MIN_UINT32;
191 return MAX_UINT32;
194 return _float32_to_uint32_helper(a);
197 /* Convert float to signed int32
198 * FIXME: Im not sure what to return if overflow/underflow happens
199 * - now its the biggest or the smallest int
201 int32_t float32_to_int32(float32 a)
203 if (isFloat32NaN(a)) {
204 return MAX_INT32;
207 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
208 if (a.parts.sign) {
209 return MIN_INT32;
211 return MAX_INT32;
213 return _float32_to_uint32_helper(a);
217 /** Helping procedure for converting float64 to uint64
218 * @param a floating point number in normalized form (no NaNs or Inf are checked )
219 * @return unsigned integer
221 static uint64_t _float64_to_uint64_helper(float64 a)
223 uint64_t frac;
225 if (a.parts.exp < FLOAT64_BIAS) {
226 /*TODO: rounding*/
227 return 0;
230 frac = a.parts.fraction;
232 frac |= FLOAT64_HIDDEN_BIT_MASK;
233 /* shift fraction to left so hidden bit will be the most significant bit */
234 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
236 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
237 if ((a.parts.sign == 1) && (frac != 0)) {
238 frac = ~frac;
239 ++frac;
242 return frac;
245 /* Convert float to unsigned int64
246 * FIXME: Im not sure what to return if overflow/underflow happens
247 * - now its the biggest or the smallest int
249 uint64_t float64_to_uint64(float64 a)
251 if (isFloat64NaN(a)) {
252 return MAX_UINT64;
255 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
256 if (a.parts.sign) {
257 return MIN_UINT64;
259 return MAX_UINT64;
262 return _float64_to_uint64_helper(a);
265 /* Convert float to signed int64
266 * FIXME: Im not sure what to return if overflow/underflow happens
267 * - now its the biggest or the smallest int
269 int64_t float64_to_int64(float64 a)
271 if (isFloat64NaN(a)) {
272 return MAX_INT64;
275 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
276 if (a.parts.sign) {
277 return MIN_INT64;
279 return MAX_INT64;
281 return _float64_to_uint64_helper(a);
288 /** Helping procedure for converting float32 to uint64
289 * @param a floating point number in normalized form (no NaNs or Inf are checked )
290 * @return unsigned integer
292 static uint64_t _float32_to_uint64_helper(float32 a)
294 uint64_t frac;
296 if (a.parts.exp < FLOAT32_BIAS) {
297 /*TODO: rounding*/
298 return 0;
301 frac = a.parts.fraction;
303 frac |= FLOAT32_HIDDEN_BIT_MASK;
304 /* shift fraction to left so hidden bit will be the most significant bit */
305 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
307 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
308 if ((a.parts.sign == 1) && (frac != 0)) {
309 frac = ~frac;
310 ++frac;
313 return frac;
316 /* Convert float to unsigned int64
317 * FIXME: Im not sure what to return if overflow/underflow happens
318 * - now its the biggest or the smallest int
320 uint64_t float32_to_uint64(float32 a)
322 if (isFloat32NaN(a)) {
323 return MAX_UINT64;
326 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
327 if (a.parts.sign) {
328 return MIN_UINT64;
330 return MAX_UINT64;
333 return _float32_to_uint64_helper(a);
336 /* Convert float to signed int64
337 * FIXME: Im not sure what to return if overflow/underflow happens
338 * - now its the biggest or the smallest int
340 int64_t float32_to_int64(float32 a)
342 if (isFloat32NaN(a)) {
343 return MAX_INT64;
346 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
347 if (a.parts.sign) {
348 return (MIN_INT64);
350 return MAX_INT64;
352 return _float32_to_uint64_helper(a);
356 /* Convert float64 to unsigned int32
357 * FIXME: Im not sure what to return if overflow/underflow happens
358 * - now its the biggest or the smallest int
360 uint32_t float64_to_uint32(float64 a)
362 if (isFloat64NaN(a)) {
363 return MAX_UINT32;
366 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
367 if (a.parts.sign) {
368 return MIN_UINT32;
370 return MAX_UINT32;
373 return (uint32_t)_float64_to_uint64_helper(a);
376 /* Convert float64 to signed int32
377 * FIXME: Im not sure what to return if overflow/underflow happens
378 * - now its the biggest or the smallest int
380 int32_t float64_to_int32(float64 a)
382 if (isFloat64NaN(a)) {
383 return MAX_INT32;
386 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
387 if (a.parts.sign) {
388 return MIN_INT32;
390 return MAX_INT32;
392 return (int32_t)_float64_to_uint64_helper(a);
395 /** Convert unsigned integer to float32
399 float32 uint32_to_float32(uint32_t i)
401 int counter;
402 int32_t exp;
403 float32 result;
405 result.parts.sign = 0;
406 result.parts.fraction = 0;
408 counter = countZeroes32(i);
410 exp = FLOAT32_BIAS + 32 - counter - 1;
412 if (counter == 32) {
413 result.binary = 0;
414 return result;
417 if (counter > 0) {
418 i <<= counter - 1;
419 } else {
420 i >>= 1;
423 roundFloat32(&exp, &i);
425 result.parts.fraction = i >> 7;
426 result.parts.exp = exp;
428 return result;
431 float32 int32_to_float32(int32_t i)
433 float32 result;
435 if (i < 0) {
436 result = uint32_to_float32((uint32_t)(-i));
437 } else {
438 result = uint32_to_float32((uint32_t)i);
441 result.parts.sign = i < 0;
443 return result;
447 float32 uint64_to_float32(uint64_t i)
449 int counter;
450 int32_t exp;
451 uint32_t j;
452 float32 result;
454 result.parts.sign = 0;
455 result.parts.fraction = 0;
457 counter = countZeroes64(i);
459 exp = FLOAT32_BIAS + 64 - counter - 1;
461 if (counter == 64) {
462 result.binary = 0;
463 return result;
466 /* Shift all to the first 31 bits (31. will be hidden 1)*/
467 if (counter > 33) {
468 i <<= counter - 1 - 32;
469 } else {
470 i >>= 1 + 32 - counter;
473 j = (uint32_t)i;
474 roundFloat32(&exp, &j);
476 result.parts.fraction = j >> 7;
477 result.parts.exp = exp;
478 return result;
481 float32 int64_to_float32(int64_t i)
483 float32 result;
485 if (i < 0) {
486 result = uint64_to_float32((uint64_t)(-i));
487 } else {
488 result = uint64_to_float32((uint64_t)i);
491 result.parts.sign = i < 0;
493 return result;
496 /** Convert unsigned integer to float64
500 float64 uint32_to_float64(uint32_t i)
502 int counter;
503 int32_t exp;
504 float64 result;
505 uint64_t frac;
507 result.parts.sign = 0;
508 result.parts.fraction = 0;
510 counter = countZeroes32(i);
512 exp = FLOAT64_BIAS + 32 - counter - 1;
514 if (counter == 32) {
515 result.binary = 0;
516 return result;
519 frac = i;
520 frac <<= counter + 32 - 1;
522 roundFloat64(&exp, &frac);
524 result.parts.fraction = frac >> 10;
525 result.parts.exp = exp;
527 return result;
530 float64 int32_to_float64(int32_t i)
532 float64 result;
534 if (i < 0) {
535 result = uint32_to_float64((uint32_t)(-i));
536 } else {
537 result = uint32_to_float64((uint32_t)i);
540 result.parts.sign = i < 0;
542 return result;
546 float64 uint64_to_float64(uint64_t i)
548 int counter;
549 int32_t exp;
550 float64 result;
552 result.parts.sign = 0;
553 result.parts.fraction = 0;
555 counter = countZeroes64(i);
557 exp = FLOAT64_BIAS + 64 - counter - 1;
559 if (counter == 64) {
560 result.binary = 0;
561 return result;
564 if (counter > 0) {
565 i <<= counter - 1;
566 } else {
567 i >>= 1;
570 roundFloat64(&exp, &i);
572 result.parts.fraction = i >> 10;
573 result.parts.exp = exp;
574 return result;
577 float64 int64_to_float64(int64_t i)
579 float64 result;
581 if (i < 0) {
582 result = uint64_to_float64((uint64_t)(-i));
583 } else {
584 result = uint64_to_float64((uint64_t)i);
587 result.parts.sign = i < 0;
589 return result;
592 /** @}