2 * Copyright (c) 2005 Josef Cejka
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup softfloat
37 #include<comparison.h>
39 /** Subtract two float32 numbers with same signs
41 float32
subFloat32(float32 a
, float32 b
)
44 uint32_t exp1
, exp2
, frac1
, frac2
;
49 expdiff
= a
.parts
.exp
- b
.parts
.exp
;
50 if ((expdiff
< 0 ) || ((expdiff
== 0) && (a
.parts
.fraction
< b
.parts
.fraction
))) {
51 if (isFloat32NaN(b
)) {
52 /* TODO: fix SigNaN */
53 if (isFloat32SigNaN(b
)) {
58 if (b
.parts
.exp
== FLOAT32_MAX_EXPONENT
) {
59 b
.parts
.sign
= !b
.parts
.sign
; /* num -(+-inf) = -+inf */
63 result
.parts
.sign
= !a
.parts
.sign
;
65 frac1
= b
.parts
.fraction
;
67 frac2
= a
.parts
.fraction
;
71 if (isFloat32NaN(a
)) {
72 /* TODO: fix SigNaN */
73 if (isFloat32SigNaN(a
) || isFloat32SigNaN(b
)) {
78 if (a
.parts
.exp
== FLOAT32_MAX_EXPONENT
) {
79 if (b
.parts
.exp
== FLOAT32_MAX_EXPONENT
) {
80 /* inf - inf => nan */
81 /* TODO: fix exception */
82 result
.binary
= FLOAT32_NAN
;
88 result
.parts
.sign
= a
.parts
.sign
;
90 frac1
= a
.parts
.fraction
;
92 frac2
= b
.parts
.fraction
;
97 /* both are denormalized */
98 result
.parts
.fraction
= frac1
-frac2
;
99 if (result
.parts
.fraction
> frac1
) {
100 /* TODO: underflow exception */
103 result
.parts
.exp
= 0;
108 frac1
|= FLOAT32_HIDDEN_BIT_MASK
;
115 frac2
|= FLOAT32_HIDDEN_BIT_MASK
;
118 /* create some space for rounding */
122 if (expdiff
> FLOAT32_FRACTION_SIZE
+ 1) {
126 frac1
= frac1
- (frac2
>> expdiff
);
128 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
129 while ((exp1
> 0) && (!(frac1
& (FLOAT32_HIDDEN_BIT_MASK
<< 6 )))) {
132 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
135 /* rounding - if first bit after fraction is set then round up */
138 if (frac1
& (FLOAT32_HIDDEN_BIT_MASK
<< 7)) {
143 /*Clear hidden bit and shift */
144 result
.parts
.fraction
= ((frac1
>> 6) & (~FLOAT32_HIDDEN_BIT_MASK
));
145 result
.parts
.exp
= exp1
;
150 /** Subtract two float64 numbers with same signs
152 float64
subFloat64(float64 a
, float64 b
)
156 uint64_t frac1
, frac2
;
161 expdiff
= a
.parts
.exp
- b
.parts
.exp
;
162 if ((expdiff
< 0 ) || ((expdiff
== 0) && (a
.parts
.fraction
< b
.parts
.fraction
))) {
163 if (isFloat64NaN(b
)) {
164 /* TODO: fix SigNaN */
165 if (isFloat64SigNaN(b
)) {
170 if (b
.parts
.exp
== FLOAT64_MAX_EXPONENT
) {
171 b
.parts
.sign
= !b
.parts
.sign
; /* num -(+-inf) = -+inf */
175 result
.parts
.sign
= !a
.parts
.sign
;
177 frac1
= b
.parts
.fraction
;
179 frac2
= a
.parts
.fraction
;
183 if (isFloat64NaN(a
)) {
184 /* TODO: fix SigNaN */
185 if (isFloat64SigNaN(a
) || isFloat64SigNaN(b
)) {
190 if (a
.parts
.exp
== FLOAT64_MAX_EXPONENT
) {
191 if (b
.parts
.exp
== FLOAT64_MAX_EXPONENT
) {
192 /* inf - inf => nan */
193 /* TODO: fix exception */
194 result
.binary
= FLOAT64_NAN
;
200 result
.parts
.sign
= a
.parts
.sign
;
202 frac1
= a
.parts
.fraction
;
204 frac2
= b
.parts
.fraction
;
209 /* both are denormalized */
210 result
.parts
.fraction
= frac1
- frac2
;
211 if (result
.parts
.fraction
> frac1
) {
212 /* TODO: underflow exception */
215 result
.parts
.exp
= 0;
220 frac1
|= FLOAT64_HIDDEN_BIT_MASK
;
227 frac2
|= FLOAT64_HIDDEN_BIT_MASK
;
230 /* create some space for rounding */
234 if (expdiff
> FLOAT64_FRACTION_SIZE
+ 1) {
238 frac1
= frac1
- (frac2
>> expdiff
);
240 /* TODO: find first nonzero digit and shift result and detect possibly underflow */
241 while ((exp1
> 0) && (!(frac1
& (FLOAT64_HIDDEN_BIT_MASK
<< 6 )))) {
244 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */
247 /* rounding - if first bit after fraction is set then round up */
250 if (frac1
& (FLOAT64_HIDDEN_BIT_MASK
<< 7)) {
255 /*Clear hidden bit and shift */
256 result
.parts
.fraction
= ((frac1
>> 6) & (~FLOAT64_HIDDEN_BIT_MASK
));
257 result
.parts
.exp
= exp1
;