Committer: Michael Beasley <mike@snafu.setup>
[mikesnafu-overlay.git] / arch / sh / kernel / cpu / sh4 / softfloat.c
blob7b2d337ee4121dacf34f6e39648d201b5494bb8e
1 /*
2 * Floating point emulation support for subnormalised numbers on SH4
3 * architecture This file is derived from the SoftFloat IEC/IEEE
4 * Floating-point Arithmetic Package, Release 2 the original license of
5 * which is reproduced below.
7 * ========================================================================
9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 * Arithmetic Package, Release 2.
12 * Written by John R. Hauser. This work was made possible in part by the
13 * International Computer Science Institute, located at Suite 600, 1947 Center
14 * Street, Berkeley, California 94704. Funding was partially provided by the
15 * National Science Foundation under grant MIP-9311980. The original version
16 * of this code was written as part of a project to build a fixed-point vector
17 * processor in collaboration with the University of California at Berkeley,
18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20 * arithmetic/softfloat.html'.
22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
28 * Derivative works are acceptable, even for commercial purposes, so long as
29 * (1) they include prominent notice that the work is derivative, and (2) they
30 * include prominent notice akin to these three paragraphs for those parts of
31 * this code that are retained.
33 * ========================================================================
35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36 * and Kamel Khelifi <kamel.khelifi@st.com>
38 #include <linux/kernel.h>
39 #include <asm/cpu/fpu.h>
41 #define LIT64( a ) a##LL
43 typedef char flag;
44 typedef unsigned char uint8;
45 typedef signed char int8;
46 typedef int uint16;
47 typedef int int16;
48 typedef unsigned int uint32;
49 typedef signed int int32;
51 typedef unsigned long long int bits64;
52 typedef signed long long int sbits64;
54 typedef unsigned char bits8;
55 typedef signed char sbits8;
56 typedef unsigned short int bits16;
57 typedef signed short int sbits16;
58 typedef unsigned int bits32;
59 typedef signed int sbits32;
61 typedef unsigned long long int uint64;
62 typedef signed long long int int64;
64 typedef unsigned long int float32;
65 typedef unsigned long long float64;
67 extern void float_raise(unsigned int flags); /* in fpu.c */
68 extern int float_rounding_mode(void); /* in fpu.c */
70 inline bits64 extractFloat64Frac(float64 a);
71 inline flag extractFloat64Sign(float64 a);
72 inline int16 extractFloat64Exp(float64 a);
73 inline int16 extractFloat32Exp(float32 a);
74 inline flag extractFloat32Sign(float32 a);
75 inline bits32 extractFloat32Frac(float32 a);
76 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
77 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
78 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
79 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
80 float64 float64_sub(float64 a, float64 b);
81 float32 float32_sub(float32 a, float32 b);
82 float32 float32_add(float32 a, float32 b);
83 float64 float64_add(float64 a, float64 b);
84 float64 float64_div(float64 a, float64 b);
85 float32 float32_div(float32 a, float32 b);
86 float32 float32_mul(float32 a, float32 b);
87 float64 float64_mul(float64 a, float64 b);
88 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
89 bits64 * z1Ptr);
90 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91 bits64 * z1Ptr);
92 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
94 static int8 countLeadingZeros32(bits32 a);
95 static int8 countLeadingZeros64(bits64 a);
96 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
97 bits64 zSig);
98 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
99 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
100 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
101 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
102 bits32 zSig);
103 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
104 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
105 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
106 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
107 bits64 * zSigPtr);
108 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
109 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
110 bits32 * zSigPtr);
112 inline bits64 extractFloat64Frac(float64 a)
114 return a & LIT64(0x000FFFFFFFFFFFFF);
117 inline flag extractFloat64Sign(float64 a)
119 return a >> 63;
122 inline int16 extractFloat64Exp(float64 a)
124 return (a >> 52) & 0x7FF;
127 inline int16 extractFloat32Exp(float32 a)
129 return (a >> 23) & 0xFF;
132 inline flag extractFloat32Sign(float32 a)
134 return a >> 31;
137 inline bits32 extractFloat32Frac(float32 a)
139 return a & 0x007FFFFF;
142 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
144 return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
147 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
149 bits64 z;
151 if (count == 0) {
152 z = a;
153 } else if (count < 64) {
154 z = (a >> count) | ((a << ((-count) & 63)) != 0);
155 } else {
156 z = (a != 0);
158 *zPtr = z;
161 static int8 countLeadingZeros32(bits32 a)
163 static const int8 countLeadingZerosHigh[] = {
164 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
165 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
181 int8 shiftCount;
183 shiftCount = 0;
184 if (a < 0x10000) {
185 shiftCount += 16;
186 a <<= 16;
188 if (a < 0x1000000) {
189 shiftCount += 8;
190 a <<= 8;
192 shiftCount += countLeadingZerosHigh[a >> 24];
193 return shiftCount;
197 static int8 countLeadingZeros64(bits64 a)
199 int8 shiftCount;
201 shiftCount = 0;
202 if (a < ((bits64) 1) << 32) {
203 shiftCount += 32;
204 } else {
205 a >>= 32;
207 shiftCount += countLeadingZeros32(a);
208 return shiftCount;
212 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
214 int8 shiftCount;
216 shiftCount = countLeadingZeros64(zSig) - 1;
217 return roundAndPackFloat64(zSign, zExp - shiftCount,
218 zSig << shiftCount);
222 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
224 int16 aExp, bExp, zExp;
225 bits64 aSig, bSig, zSig;
226 int16 expDiff;
228 aSig = extractFloat64Frac(a);
229 aExp = extractFloat64Exp(a);
230 bSig = extractFloat64Frac(b);
231 bExp = extractFloat64Exp(b);
232 expDiff = aExp - bExp;
233 aSig <<= 10;
234 bSig <<= 10;
235 if (0 < expDiff)
236 goto aExpBigger;
237 if (expDiff < 0)
238 goto bExpBigger;
239 if (aExp == 0) {
240 aExp = 1;
241 bExp = 1;
243 if (bSig < aSig)
244 goto aBigger;
245 if (aSig < bSig)
246 goto bBigger;
247 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
248 bExpBigger:
249 if (bExp == 0x7FF) {
250 return packFloat64(zSign ^ 1, 0x7FF, 0);
252 if (aExp == 0) {
253 ++expDiff;
254 } else {
255 aSig |= LIT64(0x4000000000000000);
257 shift64RightJamming(aSig, -expDiff, &aSig);
258 bSig |= LIT64(0x4000000000000000);
259 bBigger:
260 zSig = bSig - aSig;
261 zExp = bExp;
262 zSign ^= 1;
263 goto normalizeRoundAndPack;
264 aExpBigger:
265 if (aExp == 0x7FF) {
266 return a;
268 if (bExp == 0) {
269 --expDiff;
270 } else {
271 bSig |= LIT64(0x4000000000000000);
273 shift64RightJamming(bSig, expDiff, &bSig);
274 aSig |= LIT64(0x4000000000000000);
275 aBigger:
276 zSig = aSig - bSig;
277 zExp = aExp;
278 normalizeRoundAndPack:
279 --zExp;
280 return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
283 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
285 int16 aExp, bExp, zExp;
286 bits64 aSig, bSig, zSig;
287 int16 expDiff;
289 aSig = extractFloat64Frac(a);
290 aExp = extractFloat64Exp(a);
291 bSig = extractFloat64Frac(b);
292 bExp = extractFloat64Exp(b);
293 expDiff = aExp - bExp;
294 aSig <<= 9;
295 bSig <<= 9;
296 if (0 < expDiff) {
297 if (aExp == 0x7FF) {
298 return a;
300 if (bExp == 0) {
301 --expDiff;
302 } else {
303 bSig |= LIT64(0x2000000000000000);
305 shift64RightJamming(bSig, expDiff, &bSig);
306 zExp = aExp;
307 } else if (expDiff < 0) {
308 if (bExp == 0x7FF) {
309 return packFloat64(zSign, 0x7FF, 0);
311 if (aExp == 0) {
312 ++expDiff;
313 } else {
314 aSig |= LIT64(0x2000000000000000);
316 shift64RightJamming(aSig, -expDiff, &aSig);
317 zExp = bExp;
318 } else {
319 if (aExp == 0x7FF) {
320 return a;
322 if (aExp == 0)
323 return packFloat64(zSign, 0, (aSig + bSig) >> 9);
324 zSig = LIT64(0x4000000000000000) + aSig + bSig;
325 zExp = aExp;
326 goto roundAndPack;
328 aSig |= LIT64(0x2000000000000000);
329 zSig = (aSig + bSig) << 1;
330 --zExp;
331 if ((sbits64) zSig < 0) {
332 zSig = aSig + bSig;
333 ++zExp;
335 roundAndPack:
336 return roundAndPackFloat64(zSign, zExp, zSig);
340 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
342 return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
345 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
347 bits32 z;
348 if (count == 0) {
349 z = a;
350 } else if (count < 32) {
351 z = (a >> count) | ((a << ((-count) & 31)) != 0);
352 } else {
353 z = (a != 0);
355 *zPtr = z;
358 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
360 flag roundNearestEven;
361 int8 roundIncrement, roundBits;
362 flag isTiny;
364 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
365 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
366 roundIncrement = 0x40;
367 if (!roundNearestEven) {
368 roundIncrement = 0;
370 roundBits = zSig & 0x7F;
371 if (0xFD <= (bits16) zExp) {
372 if ((0xFD < zExp)
373 || ((zExp == 0xFD)
374 && ((sbits32) (zSig + roundIncrement) < 0))
376 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
377 return packFloat32(zSign, 0xFF,
378 0) - (roundIncrement == 0);
380 if (zExp < 0) {
381 isTiny = (zExp < -1)
382 || (zSig + roundIncrement < 0x80000000);
383 shift32RightJamming(zSig, -zExp, &zSig);
384 zExp = 0;
385 roundBits = zSig & 0x7F;
386 if (isTiny && roundBits)
387 float_raise(FPSCR_CAUSE_UNDERFLOW);
390 if (roundBits)
391 float_raise(FPSCR_CAUSE_INEXACT);
392 zSig = (zSig + roundIncrement) >> 7;
393 zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
394 if (zSig == 0)
395 zExp = 0;
396 return packFloat32(zSign, zExp, zSig);
400 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
402 int8 shiftCount;
404 shiftCount = countLeadingZeros32(zSig) - 1;
405 return roundAndPackFloat32(zSign, zExp - shiftCount,
406 zSig << shiftCount);
409 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
411 flag roundNearestEven;
412 int16 roundIncrement, roundBits;
413 flag isTiny;
415 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
416 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
417 roundIncrement = 0x200;
418 if (!roundNearestEven) {
419 roundIncrement = 0;
421 roundBits = zSig & 0x3FF;
422 if (0x7FD <= (bits16) zExp) {
423 if ((0x7FD < zExp)
424 || ((zExp == 0x7FD)
425 && ((sbits64) (zSig + roundIncrement) < 0))
427 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
428 return packFloat64(zSign, 0x7FF,
429 0) - (roundIncrement == 0);
431 if (zExp < 0) {
432 isTiny = (zExp < -1)
433 || (zSig + roundIncrement <
434 LIT64(0x8000000000000000));
435 shift64RightJamming(zSig, -zExp, &zSig);
436 zExp = 0;
437 roundBits = zSig & 0x3FF;
438 if (isTiny && roundBits)
439 float_raise(FPSCR_CAUSE_UNDERFLOW);
442 if (roundBits)
443 float_raise(FPSCR_CAUSE_INEXACT);
444 zSig = (zSig + roundIncrement) >> 10;
445 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
446 if (zSig == 0)
447 zExp = 0;
448 return packFloat64(zSign, zExp, zSig);
452 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
454 int16 aExp, bExp, zExp;
455 bits32 aSig, bSig, zSig;
456 int16 expDiff;
458 aSig = extractFloat32Frac(a);
459 aExp = extractFloat32Exp(a);
460 bSig = extractFloat32Frac(b);
461 bExp = extractFloat32Exp(b);
462 expDiff = aExp - bExp;
463 aSig <<= 7;
464 bSig <<= 7;
465 if (0 < expDiff)
466 goto aExpBigger;
467 if (expDiff < 0)
468 goto bExpBigger;
469 if (aExp == 0) {
470 aExp = 1;
471 bExp = 1;
473 if (bSig < aSig)
474 goto aBigger;
475 if (aSig < bSig)
476 goto bBigger;
477 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
478 bExpBigger:
479 if (bExp == 0xFF) {
480 return packFloat32(zSign ^ 1, 0xFF, 0);
482 if (aExp == 0) {
483 ++expDiff;
484 } else {
485 aSig |= 0x40000000;
487 shift32RightJamming(aSig, -expDiff, &aSig);
488 bSig |= 0x40000000;
489 bBigger:
490 zSig = bSig - aSig;
491 zExp = bExp;
492 zSign ^= 1;
493 goto normalizeRoundAndPack;
494 aExpBigger:
495 if (aExp == 0xFF) {
496 return a;
498 if (bExp == 0) {
499 --expDiff;
500 } else {
501 bSig |= 0x40000000;
503 shift32RightJamming(bSig, expDiff, &bSig);
504 aSig |= 0x40000000;
505 aBigger:
506 zSig = aSig - bSig;
507 zExp = aExp;
508 normalizeRoundAndPack:
509 --zExp;
510 return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
514 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
516 int16 aExp, bExp, zExp;
517 bits32 aSig, bSig, zSig;
518 int16 expDiff;
520 aSig = extractFloat32Frac(a);
521 aExp = extractFloat32Exp(a);
522 bSig = extractFloat32Frac(b);
523 bExp = extractFloat32Exp(b);
524 expDiff = aExp - bExp;
525 aSig <<= 6;
526 bSig <<= 6;
527 if (0 < expDiff) {
528 if (aExp == 0xFF) {
529 return a;
531 if (bExp == 0) {
532 --expDiff;
533 } else {
534 bSig |= 0x20000000;
536 shift32RightJamming(bSig, expDiff, &bSig);
537 zExp = aExp;
538 } else if (expDiff < 0) {
539 if (bExp == 0xFF) {
540 return packFloat32(zSign, 0xFF, 0);
542 if (aExp == 0) {
543 ++expDiff;
544 } else {
545 aSig |= 0x20000000;
547 shift32RightJamming(aSig, -expDiff, &aSig);
548 zExp = bExp;
549 } else {
550 if (aExp == 0xFF) {
551 return a;
553 if (aExp == 0)
554 return packFloat32(zSign, 0, (aSig + bSig) >> 6);
555 zSig = 0x40000000 + aSig + bSig;
556 zExp = aExp;
557 goto roundAndPack;
559 aSig |= 0x20000000;
560 zSig = (aSig + bSig) << 1;
561 --zExp;
562 if ((sbits32) zSig < 0) {
563 zSig = aSig + bSig;
564 ++zExp;
566 roundAndPack:
567 return roundAndPackFloat32(zSign, zExp, zSig);
571 float64 float64_sub(float64 a, float64 b)
573 flag aSign, bSign;
575 aSign = extractFloat64Sign(a);
576 bSign = extractFloat64Sign(b);
577 if (aSign == bSign) {
578 return subFloat64Sigs(a, b, aSign);
579 } else {
580 return addFloat64Sigs(a, b, aSign);
585 float32 float32_sub(float32 a, float32 b)
587 flag aSign, bSign;
589 aSign = extractFloat32Sign(a);
590 bSign = extractFloat32Sign(b);
591 if (aSign == bSign) {
592 return subFloat32Sigs(a, b, aSign);
593 } else {
594 return addFloat32Sigs(a, b, aSign);
599 float32 float32_add(float32 a, float32 b)
601 flag aSign, bSign;
603 aSign = extractFloat32Sign(a);
604 bSign = extractFloat32Sign(b);
605 if (aSign == bSign) {
606 return addFloat32Sigs(a, b, aSign);
607 } else {
608 return subFloat32Sigs(a, b, aSign);
613 float64 float64_add(float64 a, float64 b)
615 flag aSign, bSign;
617 aSign = extractFloat64Sign(a);
618 bSign = extractFloat64Sign(b);
619 if (aSign == bSign) {
620 return addFloat64Sigs(a, b, aSign);
621 } else {
622 return subFloat64Sigs(a, b, aSign);
626 static void
627 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
629 int8 shiftCount;
631 shiftCount = countLeadingZeros64(aSig) - 11;
632 *zSigPtr = aSig << shiftCount;
633 *zExpPtr = 1 - shiftCount;
636 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
637 bits64 * z1Ptr)
639 bits64 z1;
641 z1 = a1 + b1;
642 *z1Ptr = z1;
643 *z0Ptr = a0 + b0 + (z1 < a1);
646 inline void
647 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
648 bits64 * z1Ptr)
650 *z1Ptr = a1 - b1;
651 *z0Ptr = a0 - b0 - (a1 < b1);
654 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
656 bits64 b0, b1;
657 bits64 rem0, rem1, term0, term1;
658 bits64 z;
659 if (b <= a0)
660 return LIT64(0xFFFFFFFFFFFFFFFF);
661 b0 = b >> 32;
662 z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
663 mul64To128(b, z, &term0, &term1);
664 sub128(a0, a1, term0, term1, &rem0, &rem1);
665 while (((sbits64) rem0) < 0) {
666 z -= LIT64(0x100000000);
667 b1 = b << 32;
668 add128(rem0, rem1, b0, b1, &rem0, &rem1);
670 rem0 = (rem0 << 32) | (rem1 >> 32);
671 z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
672 return z;
675 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
677 bits32 aHigh, aLow, bHigh, bLow;
678 bits64 z0, zMiddleA, zMiddleB, z1;
680 aLow = a;
681 aHigh = a >> 32;
682 bLow = b;
683 bHigh = b >> 32;
684 z1 = ((bits64) aLow) * bLow;
685 zMiddleA = ((bits64) aLow) * bHigh;
686 zMiddleB = ((bits64) aHigh) * bLow;
687 z0 = ((bits64) aHigh) * bHigh;
688 zMiddleA += zMiddleB;
689 z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
690 zMiddleA <<= 32;
691 z1 += zMiddleA;
692 z0 += (z1 < zMiddleA);
693 *z1Ptr = z1;
694 *z0Ptr = z0;
698 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
699 bits32 * zSigPtr)
701 int8 shiftCount;
703 shiftCount = countLeadingZeros32(aSig) - 8;
704 *zSigPtr = aSig << shiftCount;
705 *zExpPtr = 1 - shiftCount;
709 float64 float64_div(float64 a, float64 b)
711 flag aSign, bSign, zSign;
712 int16 aExp, bExp, zExp;
713 bits64 aSig, bSig, zSig;
714 bits64 rem0, rem1;
715 bits64 term0, term1;
717 aSig = extractFloat64Frac(a);
718 aExp = extractFloat64Exp(a);
719 aSign = extractFloat64Sign(a);
720 bSig = extractFloat64Frac(b);
721 bExp = extractFloat64Exp(b);
722 bSign = extractFloat64Sign(b);
723 zSign = aSign ^ bSign;
724 if (aExp == 0x7FF) {
725 if (bExp == 0x7FF) {
727 return packFloat64(zSign, 0x7FF, 0);
729 if (bExp == 0x7FF) {
730 return packFloat64(zSign, 0, 0);
732 if (bExp == 0) {
733 if (bSig == 0) {
734 if ((aExp | aSig) == 0) {
735 float_raise(FPSCR_CAUSE_INVALID);
737 return packFloat64(zSign, 0x7FF, 0);
739 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
741 if (aExp == 0) {
742 if (aSig == 0)
743 return packFloat64(zSign, 0, 0);
744 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
746 zExp = aExp - bExp + 0x3FD;
747 aSig = (aSig | LIT64(0x0010000000000000)) << 10;
748 bSig = (bSig | LIT64(0x0010000000000000)) << 11;
749 if (bSig <= (aSig + aSig)) {
750 aSig >>= 1;
751 ++zExp;
753 zSig = estimateDiv128To64(aSig, 0, bSig);
754 if ((zSig & 0x1FF) <= 2) {
755 mul64To128(bSig, zSig, &term0, &term1);
756 sub128(aSig, 0, term0, term1, &rem0, &rem1);
757 while ((sbits64) rem0 < 0) {
758 --zSig;
759 add128(rem0, rem1, 0, bSig, &rem0, &rem1);
761 zSig |= (rem1 != 0);
763 return roundAndPackFloat64(zSign, zExp, zSig);
767 float32 float32_div(float32 a, float32 b)
769 flag aSign, bSign, zSign;
770 int16 aExp, bExp, zExp;
771 bits32 aSig, bSig, zSig;
773 aSig = extractFloat32Frac(a);
774 aExp = extractFloat32Exp(a);
775 aSign = extractFloat32Sign(a);
776 bSig = extractFloat32Frac(b);
777 bExp = extractFloat32Exp(b);
778 bSign = extractFloat32Sign(b);
779 zSign = aSign ^ bSign;
780 if (aExp == 0xFF) {
781 if (bExp == 0xFF) {
783 return packFloat32(zSign, 0xFF, 0);
785 if (bExp == 0xFF) {
786 return packFloat32(zSign, 0, 0);
788 if (bExp == 0) {
789 if (bSig == 0) {
790 return packFloat32(zSign, 0xFF, 0);
792 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
794 if (aExp == 0) {
795 if (aSig == 0)
796 return packFloat32(zSign, 0, 0);
797 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
799 zExp = aExp - bExp + 0x7D;
800 aSig = (aSig | 0x00800000) << 7;
801 bSig = (bSig | 0x00800000) << 8;
802 if (bSig <= (aSig + aSig)) {
803 aSig >>= 1;
804 ++zExp;
806 zSig = (((bits64) aSig) << 32) / bSig;
807 if ((zSig & 0x3F) == 0) {
808 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
810 return roundAndPackFloat32(zSign, zExp, zSig);
814 float32 float32_mul(float32 a, float32 b)
816 char aSign, bSign, zSign;
817 int aExp, bExp, zExp;
818 unsigned int aSig, bSig;
819 unsigned long long zSig64;
820 unsigned int zSig;
822 aSig = extractFloat32Frac(a);
823 aExp = extractFloat32Exp(a);
824 aSign = extractFloat32Sign(a);
825 bSig = extractFloat32Frac(b);
826 bExp = extractFloat32Exp(b);
827 bSign = extractFloat32Sign(b);
828 zSign = aSign ^ bSign;
829 if (aExp == 0) {
830 if (aSig == 0)
831 return packFloat32(zSign, 0, 0);
832 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
834 if (bExp == 0) {
835 if (bSig == 0)
836 return packFloat32(zSign, 0, 0);
837 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
839 if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
840 return roundAndPackFloat32(zSign, 0xff, 0);
842 zExp = aExp + bExp - 0x7F;
843 aSig = (aSig | 0x00800000) << 7;
844 bSig = (bSig | 0x00800000) << 8;
845 shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
846 zSig = zSig64;
847 if (0 <= (signed int)(zSig << 1)) {
848 zSig <<= 1;
849 --zExp;
851 return roundAndPackFloat32(zSign, zExp, zSig);
855 float64 float64_mul(float64 a, float64 b)
857 char aSign, bSign, zSign;
858 int aExp, bExp, zExp;
859 unsigned long long int aSig, bSig, zSig0, zSig1;
861 aSig = extractFloat64Frac(a);
862 aExp = extractFloat64Exp(a);
863 aSign = extractFloat64Sign(a);
864 bSig = extractFloat64Frac(b);
865 bExp = extractFloat64Exp(b);
866 bSign = extractFloat64Sign(b);
867 zSign = aSign ^ bSign;
869 if (aExp == 0) {
870 if (aSig == 0)
871 return packFloat64(zSign, 0, 0);
872 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
874 if (bExp == 0) {
875 if (bSig == 0)
876 return packFloat64(zSign, 0, 0);
877 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
879 if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
880 return roundAndPackFloat64(zSign, 0x7ff, 0);
882 zExp = aExp + bExp - 0x3FF;
883 aSig = (aSig | 0x0010000000000000LL) << 10;
884 bSig = (bSig | 0x0010000000000000LL) << 11;
885 mul64To128(aSig, bSig, &zSig0, &zSig1);
886 zSig0 |= (zSig1 != 0);
887 if (0 <= (signed long long int)(zSig0 << 1)) {
888 zSig0 <<= 1;
889 --zExp;
891 return roundAndPackFloat64(zSign, zExp, zSig0);