uspace/softfloat/generic/conversion.c

   1 /*
   2  * Copyright (C) 2005 Josef Cejka
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * - Redistributions of source code must retain the above copyright
  10  *   notice, this list of conditions and the following disclaimer.
  11  * - Redistributions in binary form must reproduce the above copyright
  12  *   notice, this list of conditions and the following disclaimer in the
  13  *   documentation and/or other materials provided with the distribution.
  14  * - The name of the author may not be used to endorse or promote products
  15  *   derived from this software without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /** @addtogroup softfloat
  30  * @{
  31  */
  32 /** @file
  33  */
  34
  35 #include "sftypes.h"
  36 #include "conversion.h"
  37 #include "comparison.h"
  38 #include "common.h"
  39
  40 float64 convertFloat32ToFloat64(float32 a)
  41 {
  42         float64 result;
  43         uint64_t frac;
  44
  45         result.parts.sign = a.parts.sign;
  46         result.parts.fraction = a.parts.fraction;
  47         result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE );
  48
  49         if ((isFloat32Infinity(a))||(isFloat32NaN(a))) {
  50                 result.parts.exp = 0x7FF;
  51                 /* TODO; check if its correct for SigNaNs*/
  52                 return result;
  53         };
  54
  55         result.parts.exp = a.parts.exp + ( (int)FLOAT64_BIAS - FLOAT32_BIAS );
  56         if (a.parts.exp == 0) {
  57                 /* normalize denormalized numbers */
  58
  59                 if (result.parts.fraction == 0ll) { /* fix zero */
  60                         result.parts.exp = 0ll;
  61                         return result;
  62                 }
  63
  64                 frac = result.parts.fraction;
  65
  66                 while (!(frac & (0x10000000000000ll))) {
  67                         frac <<= 1;
  68                         --result.parts.exp;
  69                 };
  70
  71                 ++result.parts.exp;
  72                 result.parts.fraction = frac;
  73         };
  74
  75         return result;
  76
  77 }
  78
  79 float32 convertFloat64ToFloat32(float64 a)
  80 {
  81         float32 result;
  82         int32_t exp;
  83         uint64_t frac;
  84
  85         result.parts.sign = a.parts.sign;
  86
  87         if (isFloat64NaN(a)) {
  88
  89                 result.parts.exp = 0xFF;
  90
  91                 if (isFloat64SigNaN(a)) {
  92                         result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */
  93                         return result;
  94                 }
  95
  96                 result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */
  97                 return result;
  98         };
  99
 100         if (isFloat64Infinity(a)) {
 101                 result.parts.fraction = 0;
 102                 result.parts.exp = 0xFF;
 103                 return result;
 104         };
 105
 106         exp = (int)a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
 107
 108         if (exp >= 0xFF) {
 109                 /*FIXME: overflow*/
 110                 result.parts.fraction = 0;
 111                 result.parts.exp = 0xFF;
 112                 return result;
 113
 114         } else if (exp <= 0 ) {
 115
 116                 /* underflow or denormalized */
 117
 118                 result.parts.exp = 0;
 119
 120                 exp *= -1;
 121                 if (exp > FLOAT32_FRACTION_SIZE ) {
 122                         /* FIXME: underflow */
 123                         result.parts.fraction = 0;
 124                         return result;
 125                 };
 126
 127                 /* denormalized */
 128
 129                 frac = a.parts.fraction;
 130                 frac |= 0x10000000000000ll; /* denormalize and set hidden bit */
 131
 132                 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
 133
 134                 while (exp > 0) {
 135                         --exp;
 136                         frac >>= 1;
 137                 };
 138                 result.parts.fraction = frac;
 139
 140                 return result;
 141         };
 142
 143         result.parts.exp = exp;
 144         result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
 145         return result;
 146 }
 147
 148
 149 /** Helping procedure for converting float32 to uint32
 150  * @param a floating point number in normalized form (no NaNs or Inf are checked )
 151  * @return unsigned integer
 152  */
 153 static uint32_t _float32_to_uint32_helper(float32 a)
 154 {
 155         uint32_t frac;
 156
 157         if (a.parts.exp < FLOAT32_BIAS) {
 158                 /*TODO: rounding*/
 159                 return 0;
 160         }
 161
 162         frac = a.parts.fraction;
 163
 164         frac |= FLOAT32_HIDDEN_BIT_MASK;
 165         /* shift fraction to left so hidden bit will be the most significant bit */
 166         frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
 167
 168         frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
 169         if ((a.parts.sign == 1) && (frac != 0)) {
 170                 frac = ~frac;
 171                 ++frac;
 172         }
 173
 174         return frac;
 175 }
 176
 177 /* Convert float to unsigned int32
 178  * FIXME: Im not sure what to return if overflow/underflow happens
 179  *      - now its the biggest or the smallest int
 180  */
 181 uint32_t float32_to_uint32(float32 a)
 182 {
 183         if (isFloat32NaN(a)) {
 184                 return MAX_UINT32;
 185         }
 186
 187         if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS)))  {
 188                 if (a.parts.sign) {
 189                         return MIN_UINT32;
 190                 }
 191                 return MAX_UINT32;
 192         }
 193
 194         return _float32_to_uint32_helper(a);
 195 }
 196
 197 /* Convert float to signed int32
 198  * FIXME: Im not sure what to return if overflow/underflow happens
 199  *      - now its the biggest or the smallest int
 200  */
 201 int32_t float32_to_int32(float32 a)
 202 {
 203         if (isFloat32NaN(a)) {
 204                 return MAX_INT32;
 205         }
 206
 207         if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS)))  {
 208                 if (a.parts.sign) {
 209                         return MIN_INT32;
 210                 }
 211                 return MAX_INT32;
 212         }
 213         return _float32_to_uint32_helper(a);
 214 }
 215
 216
 217 /** Helping procedure for converting float64 to uint64
 218  * @param a floating point number in normalized form (no NaNs or Inf are checked )
 219  * @return unsigned integer
 220  */
 221 static uint64_t _float64_to_uint64_helper(float64 a)
 222 {
 223         uint64_t frac;
 224
 225         if (a.parts.exp < FLOAT64_BIAS) {
 226                 /*TODO: rounding*/
 227                 return 0;
 228         }
 229
 230         frac = a.parts.fraction;
 231
 232         frac |= FLOAT64_HIDDEN_BIT_MASK;
 233         /* shift fraction to left so hidden bit will be the most significant bit */
 234         frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
 235
 236         frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
 237         if ((a.parts.sign == 1) && (frac != 0)) {
 238                 frac = ~frac;
 239                 ++frac;
 240         }
 241
 242         return frac;
 243 }
 244
 245 /* Convert float to unsigned int64
 246  * FIXME: Im not sure what to return if overflow/underflow happens
 247  *      - now its the biggest or the smallest int
 248  */
 249 uint64_t float64_to_uint64(float64 a)
 250 {
 251         if (isFloat64NaN(a)) {
 252                 return MAX_UINT64;
 253         }
 254
 255         if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS)))  {
 256                 if (a.parts.sign) {
 257                         return MIN_UINT64;
 258                 }
 259                 return MAX_UINT64;
 260         }
 261
 262         return _float64_to_uint64_helper(a);
 263 }
 264
 265 /* Convert float to signed int64
 266  * FIXME: Im not sure what to return if overflow/underflow happens
 267  *      - now its the biggest or the smallest int
 268  */
 269 int64_t float64_to_int64(float64 a)
 270 {
 271         if (isFloat64NaN(a)) {
 272                 return MAX_INT64;
 273         }
 274
 275         if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS)))  {
 276                 if (a.parts.sign) {
 277                         return MIN_INT64;
 278                 }
 279                 return MAX_INT64;
 280         }
 281         return _float64_to_uint64_helper(a);
 282 }
 283
 284
 285
 286
 287
 288 /** Helping procedure for converting float32 to uint64
 289  * @param a floating point number in normalized form (no NaNs or Inf are checked )
 290  * @return unsigned integer
 291  */
 292 static uint64_t _float32_to_uint64_helper(float32 a)
 293 {
 294         uint64_t frac;
 295
 296         if (a.parts.exp < FLOAT32_BIAS) {
 297                 /*TODO: rounding*/
 298                 return 0;
 299         }
 300
 301         frac = a.parts.fraction;
 302
 303         frac |= FLOAT32_HIDDEN_BIT_MASK;
 304         /* shift fraction to left so hidden bit will be the most significant bit */
 305         frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
 306
 307         frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
 308         if ((a.parts.sign == 1) && (frac != 0)) {
 309                 frac = ~frac;
 310                 ++frac;
 311         }
 312
 313         return frac;
 314 }
 315
 316 /* Convert float to unsigned int64
 317  * FIXME: Im not sure what to return if overflow/underflow happens
 318  *      - now its the biggest or the smallest int
 319  */
 320 uint64_t float32_to_uint64(float32 a)
 321 {
 322         if (isFloat32NaN(a)) {
 323                 return MAX_UINT64;
 324         }
 325
 326         if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS)))  {
 327                 if (a.parts.sign) {
 328                         return MIN_UINT64;
 329                 }
 330                 return MAX_UINT64;
 331         }
 332
 333         return _float32_to_uint64_helper(a);
 334 }
 335
 336 /* Convert float to signed int64
 337  * FIXME: Im not sure what to return if overflow/underflow happens
 338  *      - now its the biggest or the smallest int
 339  */
 340 int64_t float32_to_int64(float32 a)
 341 {
 342         if (isFloat32NaN(a)) {
 343                 return MAX_INT64;
 344         }
 345
 346         if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS)))  {
 347                 if (a.parts.sign) {
 348                         return (MIN_INT64);
 349                 }
 350                 return MAX_INT64;
 351         }
 352         return _float32_to_uint64_helper(a);
 353 }
 354
 355
 356 /* Convert float64 to unsigned int32
 357  * FIXME: Im not sure what to return if overflow/underflow happens
 358  *      - now its the biggest or the smallest int
 359  */
 360 uint32_t float64_to_uint32(float64 a)
 361 {
 362         if (isFloat64NaN(a)) {
 363                 return MAX_UINT32;
 364         }
 365
 366         if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS)))  {
 367                 if (a.parts.sign) {
 368                         return MIN_UINT32;
 369                 }
 370                 return MAX_UINT32;
 371         }
 372
 373         return (uint32_t)_float64_to_uint64_helper(a);
 374 }
 375
 376 /* Convert float64 to signed int32
 377  * FIXME: Im not sure what to return if overflow/underflow happens
 378  *      - now its the biggest or the smallest int
 379  */
 380 int32_t float64_to_int32(float64 a)
 381 {
 382         if (isFloat64NaN(a)) {
 383                 return MAX_INT32;
 384         }
 385
 386         if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS)))  {
 387                 if (a.parts.sign) {
 388                         return MIN_INT32;
 389                 }
 390                 return MAX_INT32;
 391         }
 392         return (int32_t)_float64_to_uint64_helper(a);
 393 }
 394
 395 /** Convert unsigned integer to float32
 396  *
 397  *
 398  */
 399 float32 uint32_to_float32(uint32_t i)
 400 {
 401         int counter;
 402         int32_t exp;
 403         float32 result;
 404
 405         result.parts.sign = 0;
 406         result.parts.fraction = 0;
 407
 408         counter = countZeroes32(i);
 409
 410         exp = FLOAT32_BIAS + 32 - counter - 1;
 411
 412         if (counter == 32) {
 413                 result.binary = 0;
 414                 return result;
 415         }
 416
 417         if (counter > 0) {
 418                 i <<= counter - 1;
 419         } else {
 420                 i >>= 1;
 421         }
 422
 423         roundFloat32(&exp, &i);
 424
 425         result.parts.fraction = i >> 7;
 426         result.parts.exp = exp;
 427
 428         return result;
 429 }
 430
 431 float32 int32_to_float32(int32_t i)
 432 {
 433         float32 result;
 434
 435         if (i < 0) {
 436                 result = uint32_to_float32((uint32_t)(-i));
 437         } else {
 438                 result = uint32_to_float32((uint32_t)i);
 439         }
 440
 441         result.parts.sign = i < 0;
 442
 443         return result;
 444 }
 445
 446
 447 float32 uint64_to_float32(uint64_t i)
 448 {
 449         int counter;
 450         int32_t exp;
 451         uint32_t j;
 452         float32 result;
 453
 454         result.parts.sign = 0;
 455         result.parts.fraction = 0;
 456
 457         counter = countZeroes64(i);
 458
 459         exp = FLOAT32_BIAS + 64 - counter - 1;
 460
 461         if (counter == 64) {
 462                 result.binary = 0;
 463                 return result;
 464         }
 465
 466         /* Shift all to the first 31 bits (31. will be hidden 1)*/
 467         if (counter > 33) {
 468                 i <<= counter - 1 - 32;
 469         } else {
 470                 i >>= 1 + 32 - counter;
 471         }
 472
 473         j = (uint32_t)i;
 474         roundFloat32(&exp, &j);
 475
 476         result.parts.fraction = j >> 7;
 477         result.parts.exp = exp;
 478         return result;
 479 }
 480
 481 float32 int64_to_float32(int64_t i)
 482 {
 483         float32 result;
 484
 485         if (i < 0) {
 486                 result = uint64_to_float32((uint64_t)(-i));
 487         } else {
 488                 result = uint64_to_float32((uint64_t)i);
 489         }
 490
 491         result.parts.sign = i < 0;
 492
 493         return result;
 494 }
 495
 496 /** Convert unsigned integer to float64
 497  *
 498  *
 499  */
 500 float64 uint32_to_float64(uint32_t i)
 501 {
 502         int counter;
 503         int32_t exp;
 504         float64 result;
 505         uint64_t frac;
 506
 507         result.parts.sign = 0;
 508         result.parts.fraction = 0;
 509
 510         counter = countZeroes32(i);
 511
 512         exp = FLOAT64_BIAS + 32 - counter - 1;
 513
 514         if (counter == 32) {
 515                 result.binary = 0;
 516                 return result;
 517         }
 518
 519         frac = i;
 520         frac <<= counter + 32 - 1;
 521
 522         roundFloat64(&exp, &frac);
 523
 524         result.parts.fraction = frac >> 10;
 525         result.parts.exp = exp;
 526
 527         return result;
 528 }
 529
 530 float64 int32_to_float64(int32_t i)
 531 {
 532         float64 result;
 533
 534         if (i < 0) {
 535                 result = uint32_to_float64((uint32_t)(-i));
 536         } else {
 537                 result = uint32_to_float64((uint32_t)i);
 538         }
 539
 540         result.parts.sign = i < 0;
 541
 542         return result;
 543 }
 544
 545
 546 float64 uint64_to_float64(uint64_t i)
 547 {
 548         int counter;
 549         int32_t exp;
 550         float64 result;
 551
 552         result.parts.sign = 0;
 553         result.parts.fraction = 0;
 554
 555         counter = countZeroes64(i);
 556
 557         exp = FLOAT64_BIAS + 64 - counter - 1;
 558
 559         if (counter == 64) {
 560                 result.binary = 0;
 561                 return result;
 562         }
 563
 564         if (counter > 0) {
 565                 i <<= counter - 1;
 566         } else {
 567                 i >>= 1;
 568         }
 569
 570         roundFloat64(&exp, &i);
 571
 572         result.parts.fraction = i >> 10;
 573         result.parts.exp = exp;
 574         return result;
 575 }
 576
 577 float64 int64_to_float64(int64_t i)
 578 {
 579         float64 result;
 580
 581         if (i < 0) {
 582                 result = uint64_to_float64((uint64_t)(-i));
 583         } else {
 584                 result = uint64_to_float64((uint64_t)i);
 585         }
 586
 587         result.parts.sign = i < 0;
 588
 589         return result;
 590 }
 591
 592 /** @}
 593  */