Import 2.3.18pre1
[davej-history.git] / include / math-emu / op-2.h
blobad3fbc538a0049dc77f2501b360ef8a208f33c3c
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Library General Public License as
12 published by the Free Software Foundation; either version 2 of the
13 License, or (at your option) any later version.
15 The GNU C Library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Library General Public License for more details.
20 You should have received a copy of the GNU Library General Public
21 License along with the GNU C Library; see the file COPYING.LIB. If
22 not, write to the Free Software Foundation, Inc.,
23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
26 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
27 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
28 #define _FP_FRAC_HIGH_2(X) (X##_f1)
29 #define _FP_FRAC_LOW_2(X) (X##_f0)
30 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
32 #define _FP_FRAC_SLL_2(X,N) \
33 do { \
34 if ((N) < _FP_W_TYPE_SIZE) \
35 { \
36 if (__builtin_constant_p(N) && (N) == 1) \
37 { \
38 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
39 X##_f0 += X##_f0; \
40 } \
41 else \
42 { \
43 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
44 X##_f0 <<= (N); \
45 } \
46 } \
47 else \
48 { \
49 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
50 X##_f0 = 0; \
51 } \
52 } while (0)
54 #define _FP_FRAC_SRL_2(X,N) \
55 do { \
56 if ((N) < _FP_W_TYPE_SIZE) \
57 { \
58 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
59 X##_f1 >>= (N); \
60 } \
61 else \
62 { \
63 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
64 X##_f1 = 0; \
65 } \
66 } while (0)
68 /* Right shift with sticky-lsb. */
69 #define _FP_FRAC_SRS_2(X,N,sz) \
70 do { \
71 if ((N) < _FP_W_TYPE_SIZE) \
72 { \
73 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
74 (__builtin_constant_p(N) && (N) == 1 \
75 ? X##_f0 & 1 \
76 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
77 X##_f1 >>= (N); \
78 } \
79 else \
80 { \
81 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
82 (((X##_f1 << (sz - (N))) | X##_f0) != 0)); \
83 X##_f1 = 0; \
84 } \
85 } while (0)
87 #define _FP_FRAC_ADDI_2(X,I) \
88 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
90 #define _FP_FRAC_ADD_2(R,X,Y) \
91 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
93 #define _FP_FRAC_SUB_2(R,X,Y) \
94 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
96 #define _FP_FRAC_DEC_2(X,Y) \
97 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
99 #define _FP_FRAC_CLZ_2(R,X) \
100 do { \
101 if (X##_f1) \
102 __FP_CLZ(R,X##_f1); \
103 else \
105 __FP_CLZ(R,X##_f0); \
106 R += _FP_W_TYPE_SIZE; \
108 } while(0)
110 /* Predicates */
111 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
112 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
113 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
114 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
115 #define _FP_FRAC_GT_2(X, Y) \
116 (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 > Y##_f0)
117 #define _FP_FRAC_GE_2(X, Y) \
118 (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)
120 #define _FP_ZEROFRAC_2 0, 0
121 #define _FP_MINFRAC_2 0, 1
122 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
125 * Internals
128 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
130 #define __FP_CLZ_2(R, xh, xl) \
131 do { \
132 if (xh) \
133 __FP_CLZ(R,xh); \
134 else \
136 __FP_CLZ(R,xl); \
137 R += _FP_W_TYPE_SIZE; \
139 } while(0)
141 #if 0
143 #ifndef __FP_FRAC_ADDI_2
144 #define __FP_FRAC_ADDI_2(xh, xl, i) \
145 (xh += ((xl += i) < i))
146 #endif
147 #ifndef __FP_FRAC_ADD_2
148 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
149 (rh = xh + yh + ((rl = xl + yl) < xl))
150 #endif
151 #ifndef __FP_FRAC_SUB_2
152 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
153 (rh = xh - yh - ((rl = xl - yl) > xl))
154 #endif
155 #ifndef __FP_FRAC_DEC_2
156 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
157 do { \
158 UWtype _t = xl; \
159 xh -= yh + ((xl -= yl) > _t); \
160 } while (0)
161 #endif
163 #else
165 #undef __FP_FRAC_ADDI_2
166 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
167 #undef __FP_FRAC_ADD_2
168 #define __FP_FRAC_ADD_2 add_ssaaaa
169 #undef __FP_FRAC_SUB_2
170 #define __FP_FRAC_SUB_2 sub_ddmmss
171 #undef __FP_FRAC_DEC_2
172 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
174 #endif
177 * Unpack the raw bits of a native fp value. Do not classify or
178 * normalize the data.
181 #define _FP_UNPACK_RAW_2(fs, X, val) \
182 do { \
183 union _FP_UNION_##fs _flo; _flo.flt = (val); \
185 X##_f0 = _flo.bits.frac0; \
186 X##_f1 = _flo.bits.frac1; \
187 X##_e = _flo.bits.exp; \
188 X##_s = _flo.bits.sign; \
189 } while (0)
191 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
192 do { \
193 union _FP_UNION_##fs *_flo = \
194 (union _FP_UNION_##fs *)(val); \
196 X##_f0 = _flo->bits.frac0; \
197 X##_f1 = _flo->bits.frac1; \
198 X##_e = _flo->bits.exp; \
199 X##_s = _flo->bits.sign; \
200 } while (0)
204 * Repack the raw bits of a native fp value.
207 #define _FP_PACK_RAW_2(fs, val, X) \
208 do { \
209 union _FP_UNION_##fs _flo; \
211 _flo.bits.frac0 = X##_f0; \
212 _flo.bits.frac1 = X##_f1; \
213 _flo.bits.exp = X##_e; \
214 _flo.bits.sign = X##_s; \
216 (val) = _flo.flt; \
217 } while (0)
219 #define _FP_PACK_RAW_2_P(fs, val, X) \
220 do { \
221 union _FP_UNION_##fs *_flo = \
222 (union _FP_UNION_##fs *)(val); \
224 _flo->bits.frac0 = X##_f0; \
225 _flo->bits.frac1 = X##_f1; \
226 _flo->bits.exp = X##_e; \
227 _flo->bits.sign = X##_s; \
228 } while (0)
232 * Multiplication algorithms:
235 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
237 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
238 do { \
239 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
241 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
242 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
243 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
244 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
246 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
247 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
248 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
249 _FP_FRAC_WORD_4(_z,1)); \
250 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
251 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
252 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
253 _FP_FRAC_WORD_4(_z,1)); \
255 /* Normalize since we know where the msb of the multiplicands \
256 were (bit B), we know that the msb of the of the product is \
257 at either 2B or 2B-1. */ \
258 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
259 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
260 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
261 } while (0)
263 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
264 Do only 3 multiplications instead of four. This one is for machines
265 where multiplication is much more expensive than subtraction. */
267 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
268 do { \
269 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
270 _FP_W_TYPE _d; \
271 int _c1, _c2; \
273 _b_f0 = X##_f0 + X##_f1; \
274 _c1 = _b_f0 < X##_f0; \
275 _b_f1 = Y##_f0 + Y##_f1; \
276 _c2 = _b_f1 < Y##_f0; \
277 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
278 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
279 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
281 _b_f0 &= -_c2; \
282 _b_f1 &= -_c1; \
283 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
284 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
285 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
286 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
287 _b_f0); \
288 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
289 _b_f1); \
290 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
291 _FP_FRAC_WORD_4(_z,1), \
292 0, _d, _FP_FRAC_WORD_4(_z,0)); \
293 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
294 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
295 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
296 _c_f1, _c_f0, \
297 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
299 /* Normalize since we know where the msb of the multiplicands \
300 were (bit B), we know that the msb of the of the product is \
301 at either 2B or 2B-1. */ \
302 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
303 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
304 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
305 } while (0)
307 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
308 do { \
309 _FP_FRAC_DECL_4(_z); \
310 _FP_W_TYPE _x[2], _y[2]; \
311 _x[0] = X##_f0; _x[1] = X##_f1; \
312 _y[0] = Y##_f0; _y[1] = Y##_f1; \
314 mpn_mul_n(_z_f, _x, _y, 2); \
316 /* Normalize since we know where the msb of the multiplicands \
317 were (bit B), we know that the msb of the of the product is \
318 at either 2B or 2B-1. */ \
319 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
320 R##_f0 = _z_f[0]; \
321 R##_f1 = _z_f[1]; \
322 } while (0)
324 /* Do at most 120x120=240 bits multiplication using double floating
325 point multiplication. This is useful if floating point
326 multiplication has much bigger throughput than integer multiply.
327 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
328 between 106 and 120 only.
329 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
330 SETFETZ is a macro which will disable all FPU exceptions and set rounding
331 towards zero, RESETFE should optionally reset it back. */
333 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
334 do { \
335 static const double _const[] = { \
336 /* 2^-24 */ 5.9604644775390625e-08, \
337 /* 2^-48 */ 3.5527136788005009e-15, \
338 /* 2^-72 */ 2.1175823681357508e-22, \
339 /* 2^-96 */ 1.2621774483536189e-29, \
340 /* 2^28 */ 2.68435456e+08, \
341 /* 2^4 */ 1.600000e+01, \
342 /* 2^-20 */ 9.5367431640625e-07, \
343 /* 2^-44 */ 5.6843418860808015e-14, \
344 /* 2^-68 */ 3.3881317890172014e-21, \
345 /* 2^-92 */ 2.0194839173657902e-28, \
346 /* 2^-116 */ 1.2037062152420224e-35}; \
347 double _a240, _b240, _c240, _d240, _e240, _f240, \
348 _g240, _h240, _i240, _j240, _k240; \
349 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
350 _p240, _q240, _r240, _s240; \
351 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
353 if (wfracbits < 106 || wfracbits > 120) \
354 abort(); \
356 setfetz; \
358 _e240 = (double)(long)(X##_f0 & 0xffffff); \
359 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
360 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
361 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
362 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
363 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
364 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
365 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
366 _a240 = (double)(long)(X##_f1 >> 32); \
367 _f240 = (double)(long)(Y##_f1 >> 32); \
368 _e240 *= _const[3]; \
369 _j240 *= _const[3]; \
370 _d240 *= _const[2]; \
371 _i240 *= _const[2]; \
372 _c240 *= _const[1]; \
373 _h240 *= _const[1]; \
374 _b240 *= _const[0]; \
375 _g240 *= _const[0]; \
376 _s240.d = _e240*_j240;\
377 _r240.d = _d240*_j240 + _e240*_i240;\
378 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
379 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
380 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
381 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
382 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
383 _l240.d = _a240*_g240 + _b240*_f240; \
384 _k240 = _a240*_f240; \
385 _r240.d += _s240.d; \
386 _q240.d += _r240.d; \
387 _p240.d += _q240.d; \
388 _o240.d += _p240.d; \
389 _n240.d += _o240.d; \
390 _m240.d += _n240.d; \
391 _l240.d += _m240.d; \
392 _k240 += _l240.d; \
393 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
394 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
395 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
396 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
397 _o240.d += _const[7]; \
398 _n240.d += _const[6]; \
399 _m240.d += _const[5]; \
400 _l240.d += _const[4]; \
401 if (_s240.d != 0.0) _y240 = 1; \
402 if (_r240.d != 0.0) _y240 = 1; \
403 if (_q240.d != 0.0) _y240 = 1; \
404 if (_p240.d != 0.0) _y240 = 1; \
405 _t240 = (DItype)_k240; \
406 _u240 = _l240.i; \
407 _v240 = _m240.i; \
408 _w240 = _n240.i; \
409 _x240 = _o240.i; \
410 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
411 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
412 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
413 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
414 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
415 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
416 | _y240; \
417 resetfe; \
418 } while (0)
421 * Division algorithms:
424 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
425 do { \
426 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
427 if (_FP_FRAC_GT_2(X, Y)) \
429 _n_f2 = X##_f1 >> 1; \
430 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
431 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
433 else \
435 R##_e--; \
436 _n_f2 = X##_f1; \
437 _n_f1 = X##_f0; \
438 _n_f0 = 0; \
441 /* Normalize, i.e. make the most significant bit of the \
442 denominator set. */ \
443 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
445 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
446 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
447 _r_f0 = _n_f0; \
448 if (_FP_FRAC_GT_2(_m, _r)) \
450 R##_f1--; \
451 _FP_FRAC_ADD_2(_r, Y, _r); \
452 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
454 R##_f1--; \
455 _FP_FRAC_ADD_2(_r, Y, _r); \
458 _FP_FRAC_DEC_2(_r, _m); \
460 if (_r_f1 == Y##_f1) \
462 /* This is a special case, not an optimization \
463 (_r/Y##_f1 would not fit into UWtype). \
464 As _r is guaranteed to be < Y, R##_f0 can be either \
465 (UWtype)-1 or (UWtype)-2. But as we know what kind \
466 of bits it is (sticky, guard, round), we don't care. \
467 We also don't care what the reminder is, because the \
468 guard bit will be set anyway. -jj */ \
469 R##_f0 = -1; \
471 else \
473 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
474 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
475 _r_f0 = 0; \
476 if (_FP_FRAC_GT_2(_m, _r)) \
478 R##_f0--; \
479 _FP_FRAC_ADD_2(_r, Y, _r); \
480 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
482 R##_f0--; \
483 _FP_FRAC_ADD_2(_r, Y, _r); \
486 if (!_FP_FRAC_EQ_2(_r, _m)) \
487 R##_f0 |= _FP_WORK_STICKY; \
489 } while (0)
492 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
493 do { \
494 _FP_W_TYPE _x[4], _y[2], _z[4]; \
495 _y[0] = Y##_f0; _y[1] = Y##_f1; \
496 _x[0] = _x[3] = 0; \
497 if (_FP_FRAC_GT_2(X, Y)) \
499 R##_e++; \
500 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
501 X##_f1 >> (_FP_W_TYPE_SIZE - \
502 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
503 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
505 else \
507 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
508 X##_f1 >> (_FP_W_TYPE_SIZE - \
509 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
510 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
513 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
514 R##_f1 = _z[1]; \
515 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
516 } while (0)
520 * Square root algorithms:
521 * We have just one right now, maybe Newton approximation
522 * should be added for those machines where division is fast.
525 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
526 do { \
527 while (q) \
529 T##_f1 = S##_f1 + q; \
530 if (T##_f1 <= X##_f1) \
532 S##_f1 = T##_f1 + q; \
533 X##_f1 -= T##_f1; \
534 R##_f1 += q; \
536 _FP_FRAC_SLL_2(X, 1); \
537 q >>= 1; \
539 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
540 while (q != _FP_WORK_ROUND) \
542 T##_f0 = S##_f0 + q; \
543 T##_f1 = S##_f1; \
544 if (T##_f1 < X##_f1 || \
545 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
547 S##_f0 = T##_f0 + q; \
548 S##_f1 += (T##_f0 > S##_f0); \
549 _FP_FRAC_DEC_2(X, T); \
550 R##_f0 += q; \
552 _FP_FRAC_SLL_2(X, 1); \
553 q >>= 1; \
555 if (X##_f0 | X##_f1) \
557 if (S##_f1 < X##_f1 || \
558 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
559 R##_f0 |= _FP_WORK_ROUND; \
560 R##_f0 |= _FP_WORK_STICKY; \
562 } while (0)
566 * Assembly/disassembly for converting to/from integral types.
567 * No shifting or overflow handled here.
570 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
571 do { \
572 if (rsize <= _FP_W_TYPE_SIZE) \
573 r = X##_f0; \
574 else \
576 r = X##_f1; \
577 r <<= _FP_W_TYPE_SIZE; \
578 r += X##_f0; \
580 } while (0)
582 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
583 do { \
584 X##_f0 = r; \
585 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
586 } while (0)
589 * Convert FP values between word sizes
592 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
593 do { \
594 if (S##_c != FP_CLS_NAN) \
595 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
596 _FP_WFRACBITS_##sfs); \
597 else \
598 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
599 D##_f = S##_f0; \
600 } while (0)
602 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
603 do { \
604 D##_f0 = S##_f; \
605 D##_f1 = 0; \
606 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
607 } while (0)