2 * Copyright (C) 2005 Josef Cejka
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup softfloat
38 #include<comparison.h>
43 float32
divFloat32(float32 a
, float32 b
)
46 int32_t aexp
, bexp
, cexp
;
47 uint64_t afrac
, bfrac
, cfrac
;
49 result
.parts
.sign
= a
.parts
.sign
^ b
.parts
.sign
;
51 if (isFloat32NaN(a
)) {
52 if (isFloat32SigNaN(a
)) {
59 if (isFloat32NaN(b
)) {
60 if (isFloat32SigNaN(b
)) {
67 if (isFloat32Infinity(a
)) {
68 if (isFloat32Infinity(b
)) {
70 result
.binary
= FLOAT32_NAN
;
74 result
.parts
.exp
= a
.parts
.exp
;
75 result
.parts
.fraction
= a
.parts
.fraction
;
79 if (isFloat32Infinity(b
)) {
80 if (isFloat32Zero(a
)) {
83 result
.parts
.fraction
= 0;
88 result
.parts
.fraction
= 0;
92 if (isFloat32Zero(b
)) {
93 if (isFloat32Zero(a
)) {
95 result
.binary
= FLOAT32_NAN
;
98 /* FIXME: division by zero */
100 result
.parts
.fraction
= 0;
105 afrac
= a
.parts
.fraction
;
107 bfrac
= b
.parts
.fraction
;
110 /* denormalized numbers */
113 result
.parts
.exp
= 0;
114 result
.parts
.fraction
= 0;
120 /* afrac is nonzero => it must stop */
121 while (! (afrac
& FLOAT32_HIDDEN_BIT_MASK
) ) {
129 /* bfrac is nonzero => it must stop */
130 while (! (bfrac
& FLOAT32_HIDDEN_BIT_MASK
) ) {
136 afrac
= (afrac
| FLOAT32_HIDDEN_BIT_MASK
) << (32 - FLOAT32_FRACTION_SIZE
- 1 );
137 bfrac
= (bfrac
| FLOAT32_HIDDEN_BIT_MASK
) << (32 - FLOAT32_FRACTION_SIZE
);
139 if ( bfrac
<= (afrac
<< 1) ) {
144 cexp
= aexp
- bexp
+ FLOAT32_BIAS
- 2;
146 cfrac
= (afrac
<< 32) / bfrac
;
147 if (( cfrac
& 0x3F ) == 0) {
148 cfrac
|= ( bfrac
* cfrac
!= afrac
<< 32 );
153 /* find first nonzero digit and shift result and detect possibly underflow */
154 while ((cexp
> 0) && (cfrac
) && (!(cfrac
& (FLOAT32_HIDDEN_BIT_MASK
<< 7 )))) {
157 /* TODO: fix underflow */
160 cfrac
+= (0x1 << 6); /* FIXME: 7 is not sure*/
162 if (cfrac
& (FLOAT32_HIDDEN_BIT_MASK
<< 7)) {
168 if (cexp
>= FLOAT32_MAX_EXPONENT
) {
169 /* FIXME: overflow, return infinity */
170 result
.parts
.exp
= FLOAT32_MAX_EXPONENT
;
171 result
.parts
.fraction
= 0;
176 /* FIXME: underflow */
177 result
.parts
.exp
= 0;
178 if ((cexp
+ FLOAT32_FRACTION_SIZE
) < 0) {
179 result
.parts
.fraction
= 0;
189 result
.parts
.exp
= (uint32_t)cexp
;
192 result
.parts
.fraction
= ((cfrac
>> 6) & (~FLOAT32_HIDDEN_BIT_MASK
));
197 float64
divFloat64(float64 a
, float64 b
)
200 int64_t aexp
, bexp
, cexp
;
201 uint64_t afrac
, bfrac
, cfrac
;
202 uint64_t remlo
, remhi
;
204 result
.parts
.sign
= a
.parts
.sign
^ b
.parts
.sign
;
206 if (isFloat64NaN(a
)) {
208 if (isFloat64SigNaN(b
)) {
213 if (isFloat64SigNaN(a
)) {
220 if (isFloat64NaN(b
)) {
221 if (isFloat64SigNaN(b
)) {
228 if (isFloat64Infinity(a
)) {
229 if (isFloat64Infinity(b
) || isFloat64Zero(b
)) {
230 /*FIXME: inf / inf */
231 result
.binary
= FLOAT64_NAN
;
235 result
.parts
.exp
= a
.parts
.exp
;
236 result
.parts
.fraction
= a
.parts
.fraction
;
240 if (isFloat64Infinity(b
)) {
241 if (isFloat64Zero(a
)) {
243 result
.parts
.exp
= 0;
244 result
.parts
.fraction
= 0;
247 /* FIXME: num / inf*/
248 result
.parts
.exp
= 0;
249 result
.parts
.fraction
= 0;
253 if (isFloat64Zero(b
)) {
254 if (isFloat64Zero(a
)) {
256 result
.binary
= FLOAT64_NAN
;
259 /* FIXME: division by zero */
260 result
.parts
.exp
= 0;
261 result
.parts
.fraction
= 0;
266 afrac
= a
.parts
.fraction
;
268 bfrac
= b
.parts
.fraction
;
271 /* denormalized numbers */
274 result
.parts
.exp
= 0;
275 result
.parts
.fraction
= 0;
281 /* afrac is nonzero => it must stop */
282 while (! (afrac
& FLOAT64_HIDDEN_BIT_MASK
) ) {
290 /* bfrac is nonzero => it must stop */
291 while (! (bfrac
& FLOAT64_HIDDEN_BIT_MASK
) ) {
297 afrac
= (afrac
| FLOAT64_HIDDEN_BIT_MASK
) << (64 - FLOAT64_FRACTION_SIZE
- 2 );
298 bfrac
= (bfrac
| FLOAT64_HIDDEN_BIT_MASK
) << (64 - FLOAT64_FRACTION_SIZE
- 1);
300 if ( bfrac
<= (afrac
<< 1) ) {
305 cexp
= aexp
- bexp
+ FLOAT64_BIAS
- 2;
307 cfrac
= divFloat64estim(afrac
, bfrac
);
309 if (( cfrac
& 0x1FF ) <= 2) { /*FIXME:?? */
310 mul64integers( bfrac
, cfrac
, &remlo
, &remhi
);
311 /* (__u128)afrac << 64 - ( ((__u128)remhi<<64) + (__u128)remlo )*/
312 remhi
= afrac
- remhi
- ( remlo
> 0);
315 while ((int64_t) remhi
< 0) {
318 remhi
+= ( remlo
< bfrac
);
320 cfrac
|= ( remlo
!= 0 );
323 /* round and shift */
324 result
= finishFloat64(cexp
, cfrac
, result
.parts
.sign
);
329 uint64_t divFloat64estim(uint64_t a
, uint64_t b
)
332 uint64_t remhi
, remlo
;
336 return 0xFFFFFFFFFFFFFFFFull
;
340 result
= ((bhi
<< 32) <= a
) ?( 0xFFFFFFFFull
<< 32) : ( a
/ bhi
) << 32;
341 mul64integers(b
, result
, &remlo
, &remhi
);
343 remhi
= a
- remhi
- (remlo
> 0);
347 while ( (int64_t) remhi
< 0 ) {
348 result
-= 0x1ll
<< 32;
350 remhi
+= bhi
+ ( remlo
< b
);
352 remhi
= (remhi
<< 32) | (remlo
>> 32);
353 if (( bhi
<< 32) <= remhi
) {
354 result
|= 0xFFFFFFFF;
356 result
|= remhi
/ bhi
;