Update.
[glibc.git] / soft-fp / op-2.h
blob4a5add55cbb77dc76342ce6f420257caefd57eff
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 The GNU C Library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
20 You should have received a copy of the GNU Lesser General Public
21 License along with the GNU C Library; if not, write to the Free
22 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 02111-1307 USA. */
25 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
26 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
27 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
28 #define _FP_FRAC_HIGH_2(X) (X##_f1)
29 #define _FP_FRAC_LOW_2(X) (X##_f0)
30 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
32 #define _FP_FRAC_SLL_2(X,N) \
33 do { \
34 if ((N) < _FP_W_TYPE_SIZE) \
35 { \
36 if (__builtin_constant_p(N) && (N) == 1) \
37 { \
38 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
39 X##_f0 += X##_f0; \
40 } \
41 else \
42 { \
43 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
44 X##_f0 <<= (N); \
45 } \
46 } \
47 else \
48 { \
49 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
50 X##_f0 = 0; \
51 } \
52 } while (0)
54 #define _FP_FRAC_SRL_2(X,N) \
55 do { \
56 if ((N) < _FP_W_TYPE_SIZE) \
57 { \
58 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
59 X##_f1 >>= (N); \
60 } \
61 else \
62 { \
63 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
64 X##_f1 = 0; \
65 } \
66 } while (0)
68 /* Right shift with sticky-lsb. */
69 #define _FP_FRAC_SRS_2(X,N,sz) \
70 do { \
71 if ((N) < _FP_W_TYPE_SIZE) \
72 { \
73 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
74 (__builtin_constant_p(N) && (N) == 1 \
75 ? X##_f0 & 1 \
76 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
77 X##_f1 >>= (N); \
78 } \
79 else \
80 { \
81 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
82 (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | \
83 X##_f0) != 0)); \
84 X##_f1 = 0; \
85 } \
86 } while (0)
88 #define _FP_FRAC_ADDI_2(X,I) \
89 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
91 #define _FP_FRAC_ADD_2(R,X,Y) \
92 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
94 #define _FP_FRAC_SUB_2(R,X,Y) \
95 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
97 #define _FP_FRAC_DEC_2(X,Y) \
98 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
100 #define _FP_FRAC_CLZ_2(R,X) \
101 do { \
102 if (X##_f1) \
103 __FP_CLZ(R,X##_f1); \
104 else \
106 __FP_CLZ(R,X##_f0); \
107 R += _FP_W_TYPE_SIZE; \
109 } while(0)
111 /* Predicates */
112 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
113 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
114 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
115 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
116 #define _FP_FRAC_GT_2(X, Y) \
117 (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 > Y##_f0)
118 #define _FP_FRAC_GE_2(X, Y) \
119 (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)
121 #define _FP_ZEROFRAC_2 0, 0
122 #define _FP_MINFRAC_2 0, 1
123 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
126 * Internals
129 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
131 #define __FP_CLZ_2(R, xh, xl) \
132 do { \
133 if (xh) \
134 __FP_CLZ(R,xh); \
135 else \
137 __FP_CLZ(R,xl); \
138 R += _FP_W_TYPE_SIZE; \
140 } while(0)
142 #if 0
144 #ifndef __FP_FRAC_ADDI_2
145 #define __FP_FRAC_ADDI_2(xh, xl, i) \
146 (xh += ((xl += i) < i))
147 #endif
148 #ifndef __FP_FRAC_ADD_2
149 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
150 (rh = xh + yh + ((rl = xl + yl) < xl))
151 #endif
152 #ifndef __FP_FRAC_SUB_2
153 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
154 (rh = xh - yh - ((rl = xl - yl) > xl))
155 #endif
156 #ifndef __FP_FRAC_DEC_2
157 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
158 do { \
159 UWtype _t = xl; \
160 xh -= yh + ((xl -= yl) > _t); \
161 } while (0)
162 #endif
164 #else
166 #undef __FP_FRAC_ADDI_2
167 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
168 #undef __FP_FRAC_ADD_2
169 #define __FP_FRAC_ADD_2 add_ssaaaa
170 #undef __FP_FRAC_SUB_2
171 #define __FP_FRAC_SUB_2 sub_ddmmss
172 #undef __FP_FRAC_DEC_2
173 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
175 #endif
178 * Unpack the raw bits of a native fp value. Do not classify or
179 * normalize the data.
182 #define _FP_UNPACK_RAW_2(fs, X, val) \
183 do { \
184 union _FP_UNION_##fs _flo; _flo.flt = (val); \
186 X##_f0 = _flo.bits.frac0; \
187 X##_f1 = _flo.bits.frac1; \
188 X##_e = _flo.bits.exp; \
189 X##_s = _flo.bits.sign; \
190 } while (0)
192 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
193 do { \
194 union _FP_UNION_##fs *_flo = \
195 (union _FP_UNION_##fs *)(val); \
197 X##_f0 = _flo->bits.frac0; \
198 X##_f1 = _flo->bits.frac1; \
199 X##_e = _flo->bits.exp; \
200 X##_s = _flo->bits.sign; \
201 } while (0)
205 * Repack the raw bits of a native fp value.
208 #define _FP_PACK_RAW_2(fs, val, X) \
209 do { \
210 union _FP_UNION_##fs _flo; \
212 _flo.bits.frac0 = X##_f0; \
213 _flo.bits.frac1 = X##_f1; \
214 _flo.bits.exp = X##_e; \
215 _flo.bits.sign = X##_s; \
217 (val) = _flo.flt; \
218 } while (0)
220 #define _FP_PACK_RAW_2_P(fs, val, X) \
221 do { \
222 union _FP_UNION_##fs *_flo = \
223 (union _FP_UNION_##fs *)(val); \
225 _flo->bits.frac0 = X##_f0; \
226 _flo->bits.frac1 = X##_f1; \
227 _flo->bits.exp = X##_e; \
228 _flo->bits.sign = X##_s; \
229 } while (0)
233 * Multiplication algorithms:
236 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
238 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
239 do { \
240 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
242 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
243 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
244 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
245 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
247 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
248 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
249 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
250 _FP_FRAC_WORD_4(_z,1)); \
251 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
252 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
253 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
254 _FP_FRAC_WORD_4(_z,1)); \
256 /* Normalize since we know where the msb of the multiplicands \
257 were (bit B), we know that the msb of the of the product is \
258 at either 2B or 2B-1. */ \
259 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
260 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
261 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
262 } while (0)
264 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
265 Do only 3 multiplications instead of four. This one is for machines
266 where multiplication is much more expensive than subtraction. */
268 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
269 do { \
270 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
271 _FP_W_TYPE _d; \
272 int _c1, _c2; \
274 _b_f0 = X##_f0 + X##_f1; \
275 _c1 = _b_f0 < X##_f0; \
276 _b_f1 = Y##_f0 + Y##_f1; \
277 _c2 = _b_f1 < Y##_f0; \
278 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
279 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
280 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
282 _b_f0 &= -_c2; \
283 _b_f1 &= -_c1; \
284 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
285 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
286 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
287 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
288 _b_f0); \
289 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
290 _b_f1); \
291 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
292 _FP_FRAC_WORD_4(_z,1), \
293 0, _d, _FP_FRAC_WORD_4(_z,0)); \
294 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
295 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
296 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
297 _c_f1, _c_f0, \
298 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
300 /* Normalize since we know where the msb of the multiplicands \
301 were (bit B), we know that the msb of the of the product is \
302 at either 2B or 2B-1. */ \
303 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
304 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
305 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
306 } while (0)
308 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
309 do { \
310 _FP_FRAC_DECL_4(_z); \
311 _FP_W_TYPE _x[2], _y[2]; \
312 _x[0] = X##_f0; _x[1] = X##_f1; \
313 _y[0] = Y##_f0; _y[1] = Y##_f1; \
315 mpn_mul_n(_z_f, _x, _y, 2); \
317 /* Normalize since we know where the msb of the multiplicands \
318 were (bit B), we know that the msb of the of the product is \
319 at either 2B or 2B-1. */ \
320 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
321 R##_f0 = _z_f[0]; \
322 R##_f1 = _z_f[1]; \
323 } while (0)
325 /* Do at most 120x120=240 bits multiplication using double floating
326 point multiplication. This is useful if floating point
327 multiplication has much bigger throughput than integer multiply.
328 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
329 between 106 and 120 only.
330 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
331 SETFETZ is a macro which will disable all FPU exceptions and set rounding
332 towards zero, RESETFE should optionally reset it back. */
334 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
335 do { \
336 static const double _const[] = { \
337 /* 2^-24 */ 5.9604644775390625e-08, \
338 /* 2^-48 */ 3.5527136788005009e-15, \
339 /* 2^-72 */ 2.1175823681357508e-22, \
340 /* 2^-96 */ 1.2621774483536189e-29, \
341 /* 2^28 */ 2.68435456e+08, \
342 /* 2^4 */ 1.600000e+01, \
343 /* 2^-20 */ 9.5367431640625e-07, \
344 /* 2^-44 */ 5.6843418860808015e-14, \
345 /* 2^-68 */ 3.3881317890172014e-21, \
346 /* 2^-92 */ 2.0194839173657902e-28, \
347 /* 2^-116 */ 1.2037062152420224e-35}; \
348 double _a240, _b240, _c240, _d240, _e240, _f240, \
349 _g240, _h240, _i240, _j240, _k240; \
350 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
351 _p240, _q240, _r240, _s240; \
352 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
354 if (wfracbits < 106 || wfracbits > 120) \
355 abort(); \
357 setfetz; \
359 _e240 = (double)(long)(X##_f0 & 0xffffff); \
360 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
361 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
362 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
363 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
364 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
365 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
366 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
367 _a240 = (double)(long)(X##_f1 >> 32); \
368 _f240 = (double)(long)(Y##_f1 >> 32); \
369 _e240 *= _const[3]; \
370 _j240 *= _const[3]; \
371 _d240 *= _const[2]; \
372 _i240 *= _const[2]; \
373 _c240 *= _const[1]; \
374 _h240 *= _const[1]; \
375 _b240 *= _const[0]; \
376 _g240 *= _const[0]; \
377 _s240.d = _e240*_j240;\
378 _r240.d = _d240*_j240 + _e240*_i240;\
379 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
380 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
381 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
382 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
383 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
384 _l240.d = _a240*_g240 + _b240*_f240; \
385 _k240 = _a240*_f240; \
386 _r240.d += _s240.d; \
387 _q240.d += _r240.d; \
388 _p240.d += _q240.d; \
389 _o240.d += _p240.d; \
390 _n240.d += _o240.d; \
391 _m240.d += _n240.d; \
392 _l240.d += _m240.d; \
393 _k240 += _l240.d; \
394 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
395 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
396 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
397 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
398 _o240.d += _const[7]; \
399 _n240.d += _const[6]; \
400 _m240.d += _const[5]; \
401 _l240.d += _const[4]; \
402 if (_s240.d != 0.0) _y240 = 1; \
403 if (_r240.d != 0.0) _y240 = 1; \
404 if (_q240.d != 0.0) _y240 = 1; \
405 if (_p240.d != 0.0) _y240 = 1; \
406 _t240 = (DItype)_k240; \
407 _u240 = _l240.i; \
408 _v240 = _m240.i; \
409 _w240 = _n240.i; \
410 _x240 = _o240.i; \
411 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
412 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
413 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
414 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
415 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
416 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
417 | _y240; \
418 resetfe; \
419 } while (0)
422 * Division algorithms:
425 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
426 do { \
427 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
428 if (_FP_FRAC_GT_2(X, Y)) \
430 _n_f2 = X##_f1 >> 1; \
431 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
432 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
434 else \
436 R##_e--; \
437 _n_f2 = X##_f1; \
438 _n_f1 = X##_f0; \
439 _n_f0 = 0; \
442 /* Normalize, i.e. make the most significant bit of the \
443 denominator set. */ \
444 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
446 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
447 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
448 _r_f0 = _n_f0; \
449 if (_FP_FRAC_GT_2(_m, _r)) \
451 R##_f1--; \
452 _FP_FRAC_ADD_2(_r, Y, _r); \
453 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
455 R##_f1--; \
456 _FP_FRAC_ADD_2(_r, Y, _r); \
459 _FP_FRAC_DEC_2(_r, _m); \
461 if (_r_f1 == Y##_f1) \
463 /* This is a special case, not an optimization \
464 (_r/Y##_f1 would not fit into UWtype). \
465 As _r is guaranteed to be < Y, R##_f0 can be either \
466 (UWtype)-1 or (UWtype)-2. But as we know what kind \
467 of bits it is (sticky, guard, round), we don't care. \
468 We also don't care what the reminder is, because the \
469 guard bit will be set anyway. -jj */ \
470 R##_f0 = -1; \
472 else \
474 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
475 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
476 _r_f0 = 0; \
477 if (_FP_FRAC_GT_2(_m, _r)) \
479 R##_f0--; \
480 _FP_FRAC_ADD_2(_r, Y, _r); \
481 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
483 R##_f0--; \
484 _FP_FRAC_ADD_2(_r, Y, _r); \
487 if (!_FP_FRAC_EQ_2(_r, _m)) \
488 R##_f0 |= _FP_WORK_STICKY; \
490 } while (0)
493 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
494 do { \
495 _FP_W_TYPE _x[4], _y[2], _z[4]; \
496 _y[0] = Y##_f0; _y[1] = Y##_f1; \
497 _x[0] = _x[3] = 0; \
498 if (_FP_FRAC_GT_2(X, Y)) \
500 R##_e++; \
501 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
502 X##_f1 >> (_FP_W_TYPE_SIZE - \
503 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
504 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
506 else \
508 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
509 X##_f1 >> (_FP_W_TYPE_SIZE - \
510 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
511 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
514 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
515 R##_f1 = _z[1]; \
516 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
517 } while (0)
521 * Square root algorithms:
522 * We have just one right now, maybe Newton approximation
523 * should be added for those machines where division is fast.
526 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
527 do { \
528 while (q) \
530 T##_f1 = S##_f1 + q; \
531 if (T##_f1 <= X##_f1) \
533 S##_f1 = T##_f1 + q; \
534 X##_f1 -= T##_f1; \
535 R##_f1 += q; \
537 _FP_FRAC_SLL_2(X, 1); \
538 q >>= 1; \
540 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
541 while (q != _FP_WORK_ROUND) \
543 T##_f0 = S##_f0 + q; \
544 T##_f1 = S##_f1; \
545 if (T##_f1 < X##_f1 || \
546 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
548 S##_f0 = T##_f0 + q; \
549 S##_f1 += (T##_f0 > S##_f0); \
550 _FP_FRAC_DEC_2(X, T); \
551 R##_f0 += q; \
553 _FP_FRAC_SLL_2(X, 1); \
554 q >>= 1; \
556 if (X##_f0 | X##_f1) \
558 if (S##_f1 < X##_f1 || \
559 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
560 R##_f0 |= _FP_WORK_ROUND; \
561 R##_f0 |= _FP_WORK_STICKY; \
563 } while (0)
567 * Assembly/disassembly for converting to/from integral types.
568 * No shifting or overflow handled here.
571 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
572 do { \
573 if (rsize <= _FP_W_TYPE_SIZE) \
574 r = X##_f0; \
575 else \
577 r = X##_f1; \
578 r <<= _FP_W_TYPE_SIZE; \
579 r += X##_f0; \
581 } while (0)
583 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
584 do { \
585 X##_f0 = r; \
586 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
587 } while (0)
590 * Convert FP values between word sizes
593 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
594 do { \
595 if (S##_c != FP_CLS_NAN) \
596 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
597 _FP_WFRACBITS_##sfs); \
598 else \
599 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
600 D##_f = S##_f0; \
601 } while (0)
603 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
604 do { \
605 D##_f0 = S##_f; \
606 D##_f1 = 0; \
607 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
608 } while (0)