1 /* multi_arith.h: multi-precision integer arithmetic functions, needed
2 to do extended-precision floating point.
4 (c) 1998 David Huggins-Daines.
6 Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
9 You may copy, modify, and redistribute this file under the terms of
10 the GNU General Public License, version 2, or any later version, at
15 These are not general multi-precision math routines. Rather, they
16 implement the subset of integer arithmetic that we need in order to
17 multiply, divide, and normalize 128-bit unsigned mantissae. */
23 static inline void fp_denormalize(struct fp_ext
*reg
, unsigned int cnt
)
29 reg
->lowmant
= reg
->mant
.m32
[1] << (8 - cnt
);
30 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] >> cnt
) |
31 (reg
->mant
.m32
[0] << (32 - cnt
));
32 reg
->mant
.m32
[0] = reg
->mant
.m32
[0] >> cnt
;
35 reg
->lowmant
= reg
->mant
.m32
[1] >> (cnt
- 8);
36 if (reg
->mant
.m32
[1] << (40 - cnt
))
38 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] >> cnt
) |
39 (reg
->mant
.m32
[0] << (32 - cnt
));
40 reg
->mant
.m32
[0] = reg
->mant
.m32
[0] >> cnt
;
43 asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg
->lowmant
)
44 : "m" (reg
->mant
.m32
[0]), "d" (64 - cnt
));
45 if (reg
->mant
.m32
[1] << (40 - cnt
))
47 reg
->mant
.m32
[1] = reg
->mant
.m32
[0] >> (cnt
- 32);
51 reg
->lowmant
= reg
->mant
.m32
[0] >> (cnt
- 40);
52 if ((reg
->mant
.m32
[0] << (72 - cnt
)) || reg
->mant
.m32
[1])
54 reg
->mant
.m32
[1] = reg
->mant
.m32
[0] >> (cnt
- 32);
58 reg
->lowmant
= reg
->mant
.m32
[0] || reg
->mant
.m32
[1];
65 static inline int fp_overnormalize(struct fp_ext
*reg
)
69 if (reg
->mant
.m32
[0]) {
70 asm ("bfffo %1{#0,#32},%0" : "=d" (shift
) : "dm" (reg
->mant
.m32
[0]));
71 reg
->mant
.m32
[0] = (reg
->mant
.m32
[0] << shift
) | (reg
->mant
.m32
[1] >> (32 - shift
));
72 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] << shift
);
74 asm ("bfffo %1{#0,#32},%0" : "=d" (shift
) : "dm" (reg
->mant
.m32
[1]));
75 reg
->mant
.m32
[0] = (reg
->mant
.m32
[1] << shift
);
83 static inline int fp_addmant(struct fp_ext
*dest
, struct fp_ext
*src
)
87 /* we assume here, gcc only insert move and a clr instr */
88 asm volatile ("add.b %1,%0" : "=d,g" (dest
->lowmant
)
89 : "g,d" (src
->lowmant
), "0,0" (dest
->lowmant
));
90 asm volatile ("addx.l %1,%0" : "=d" (dest
->mant
.m32
[1])
91 : "d" (src
->mant
.m32
[1]), "0" (dest
->mant
.m32
[1]));
92 asm volatile ("addx.l %1,%0" : "=d" (dest
->mant
.m32
[0])
93 : "d" (src
->mant
.m32
[0]), "0" (dest
->mant
.m32
[0]));
94 asm volatile ("addx.l %0,%0" : "=d" (carry
) : "0" (0));
99 static inline int fp_addcarry(struct fp_ext
*reg
)
101 if (++reg
->exp
== 0x7fff) {
103 fp_set_sr(FPSR_EXC_INEX2
);
105 fp_set_sr(FPSR_EXC_OVFL
);
108 reg
->lowmant
= (reg
->mant
.m32
[1] << 7) | (reg
->lowmant
? 1 : 0);
109 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] >> 1) |
110 (reg
->mant
.m32
[0] << 31);
111 reg
->mant
.m32
[0] = (reg
->mant
.m32
[0] >> 1) | 0x80000000;
116 static inline void fp_submant(struct fp_ext
*dest
, struct fp_ext
*src1
,
119 /* we assume here, gcc only insert move and a clr instr */
120 asm volatile ("sub.b %1,%0" : "=d,g" (dest
->lowmant
)
121 : "g,d" (src2
->lowmant
), "0,0" (src1
->lowmant
));
122 asm volatile ("subx.l %1,%0" : "=d" (dest
->mant
.m32
[1])
123 : "d" (src2
->mant
.m32
[1]), "0" (src1
->mant
.m32
[1]));
124 asm volatile ("subx.l %1,%0" : "=d" (dest
->mant
.m32
[0])
125 : "d" (src2
->mant
.m32
[0]), "0" (src1
->mant
.m32
[0]));
128 #define fp_mul64(desth, destl, src1, src2) ({ \
129 asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
130 : "dm" (src1), "0" (src2)); \
132 #define fp_div64(quot, rem, srch, srcl, div) \
133 asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
134 : "dm" (div), "1" (srch), "0" (srcl))
135 #define fp_add64(dest1, dest2, src1, src2) ({ \
136 asm ("add.l %1,%0" : "=d,dm" (dest2) \
137 : "dm,d" (src2), "0,0" (dest2)); \
138 asm ("addx.l %1,%0" : "=d" (dest1) \
139 : "d" (src1), "0" (dest1)); \
141 #define fp_addx96(dest, src) ({ \
142 /* we assume here, gcc only insert move and a clr instr */ \
143 asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
144 : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
145 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
146 : "d" (temp.m32[0]), "0" (dest->m32[1])); \
147 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
148 : "d" (0), "0" (dest->m32[0])); \
150 #define fp_sub64(dest, src) ({ \
151 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
152 : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
153 asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
154 : "d" (src.m32[0]), "0" (dest.m32[0])); \
156 #define fp_sub96c(dest, srch, srcm, srcl) ({ \
158 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
159 : "dm,d" (srcl), "0,0" (dest.m32[2])); \
160 asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
161 : "d" (srcm), "0" (dest.m32[1])); \
162 asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
163 : "d" (srch), "1" (dest.m32[0])); \
167 static inline void fp_multiplymant(union fp_mant128
*dest
, struct fp_ext
*src1
,
170 union fp_mant64 temp
;
172 fp_mul64(dest
->m32
[0], dest
->m32
[1], src1
->mant
.m32
[0], src2
->mant
.m32
[0]);
173 fp_mul64(dest
->m32
[2], dest
->m32
[3], src1
->mant
.m32
[1], src2
->mant
.m32
[1]);
175 fp_mul64(temp
.m32
[0], temp
.m32
[1], src1
->mant
.m32
[0], src2
->mant
.m32
[1]);
176 fp_addx96(dest
, temp
);
178 fp_mul64(temp
.m32
[0], temp
.m32
[1], src1
->mant
.m32
[1], src2
->mant
.m32
[0]);
179 fp_addx96(dest
, temp
);
182 static inline void fp_dividemant(union fp_mant128
*dest
, struct fp_ext
*src
,
185 union fp_mant128 tmp
;
186 union fp_mant64 tmp64
;
187 unsigned long *mantp
= dest
->m32
;
188 unsigned long fix
, rem
, first
, dummy
;
191 /* the algorithm below requires dest to be smaller than div,
192 but both have the high bit set */
193 if (src
->mant
.m64
>= div
->mant
.m64
) {
194 fp_sub64(src
->mant
, div
->mant
);
200 /* basic idea behind this algorithm: we can't divide two 64bit numbers
201 (AB/CD) directly, but we can calculate AB/C0, but this means this
202 quotient is off by C0/CD, so we have to multiply the first result
203 to fix the result, after that we have nearly the correct result
204 and only a few corrections are needed. */
206 /* C0/CD can be precalculated, but it's an 64bit division again, but
207 we can make it a bit easier, by dividing first through C so we get
208 10/1D and now only a single shift and the value fits into 32bit. */
210 dummy
= div
->mant
.m32
[1] / div
->mant
.m32
[0] + 1;
211 dummy
= (dummy
>> 1) | fix
;
212 fp_div64(fix
, dummy
, fix
, 0, dummy
);
215 for (i
= 0; i
< 3; i
++, mantp
++) {
216 if (src
->mant
.m32
[0] == div
->mant
.m32
[0]) {
217 fp_div64(first
, rem
, 0, src
->mant
.m32
[1], div
->mant
.m32
[0]);
219 fp_mul64(*mantp
, dummy
, first
, fix
);
222 fp_div64(first
, rem
, src
->mant
.m32
[0], src
->mant
.m32
[1], div
->mant
.m32
[0]);
224 fp_mul64(*mantp
, dummy
, first
, fix
);
227 fp_mul64(tmp
.m32
[0], tmp
.m32
[1], div
->mant
.m32
[0], first
- *mantp
);
228 fp_add64(tmp
.m32
[0], tmp
.m32
[1], 0, rem
);
231 fp_mul64(tmp64
.m32
[0], tmp64
.m32
[1], *mantp
, div
->mant
.m32
[1]);
232 fp_sub96c(tmp
, 0, tmp64
.m32
[0], tmp64
.m32
[1]);
234 src
->mant
.m32
[0] = tmp
.m32
[1];
235 src
->mant
.m32
[1] = tmp
.m32
[2];
237 while (!fp_sub96c(tmp
, 0, div
->mant
.m32
[0], div
->mant
.m32
[1])) {
238 src
->mant
.m32
[0] = tmp
.m32
[1];
239 src
->mant
.m32
[1] = tmp
.m32
[2];
246 static inline void fp_putmant128(struct fp_ext
*dest
, union fp_mant128
*src
,
253 dest
->mant
.m64
= src
->m64
[0];
254 dest
->lowmant
= src
->m32
[2] >> 24;
255 if (src
->m32
[3] || (src
->m32
[2] << 8))
259 asm volatile ("lsl.l #1,%0"
260 : "=d" (tmp
) : "0" (src
->m32
[2]));
261 asm volatile ("roxl.l #1,%0"
262 : "=d" (dest
->mant
.m32
[1]) : "0" (src
->m32
[1]));
263 asm volatile ("roxl.l #1,%0"
264 : "=d" (dest
->mant
.m32
[0]) : "0" (src
->m32
[0]));
265 dest
->lowmant
= tmp
>> 24;
266 if (src
->m32
[3] || (tmp
<< 8))
270 asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
271 : "=d" (dest
->mant
.m32
[0])
272 : "d" (src
->m32
[0]), "0" (src
->m32
[1]));
273 asm volatile ("roxr.l #1,%0"
274 : "=d" (dest
->mant
.m32
[1]) : "0" (src
->m32
[2]));
275 asm volatile ("roxr.l #1,%0"
276 : "=d" (tmp
) : "0" (src
->m32
[3]));
277 dest
->lowmant
= tmp
>> 24;
278 if (src
->m32
[3] << 7)
282 dest
->mant
.m32
[0] = src
->m32
[1];
283 dest
->mant
.m32
[1] = src
->m32
[2];
284 dest
->lowmant
= src
->m32
[3] >> 24;
285 if (src
->m32
[3] << 8)
292 #endif /* MULTI_ARITH_H */