* sysdeps/unix/sysv/linux/sparc/sparc64/dl-procinfo.c: Moved to ...
[glibc.git] / soft-fp / op-2.h
blobd8b89ff8438c084274bc5b58ad4826b0fc223668
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 The GNU C Library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
20 You should have received a copy of the GNU Lesser General Public
21 License along with the GNU C Library; if not, write to the Free
22 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 02111-1307 USA. */
25 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
26 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
27 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
28 #define _FP_FRAC_HIGH_2(X) (X##_f1)
29 #define _FP_FRAC_LOW_2(X) (X##_f0)
30 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
32 #define _FP_FRAC_SLL_2(X,N) \
33 (void)(((N) < _FP_W_TYPE_SIZE) \
34 ? ({ \
35 if (__builtin_constant_p(N) && (N) == 1) \
36 { \
37 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
38 X##_f0 += X##_f0; \
39 } \
40 else \
41 { \
42 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
43 X##_f0 <<= (N); \
44 } \
45 0; \
46 }) \
47 : ({ \
48 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
49 X##_f0 = 0; \
50 }))
53 #define _FP_FRAC_SRL_2(X,N) \
54 (void)(((N) < _FP_W_TYPE_SIZE) \
55 ? ({ \
56 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
57 X##_f1 >>= (N); \
58 }) \
59 : ({ \
60 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
61 X##_f1 = 0; \
62 }))
64 /* Right shift with sticky-lsb. */
65 #define _FP_FRAC_SRST_2(X,S, N,sz) \
66 (void)(((N) < _FP_W_TYPE_SIZE) \
67 ? ({ \
68 S = (__builtin_constant_p(N) && (N) == 1 \
69 ? X##_f0 & 1 \
70 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
71 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
72 X##_f1 >>= (N); \
73 }) \
74 : ({ \
75 S = ((((N) == _FP_W_TYPE_SIZE \
76 ? 0 \
77 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
78 | X##_f0) != 0); \
79 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
80 X##_f1 = 0; \
81 }))
83 #define _FP_FRAC_SRS_2(X,N,sz) \
84 (void)(((N) < _FP_W_TYPE_SIZE) \
85 ? ({ \
86 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
87 (__builtin_constant_p(N) && (N) == 1 \
88 ? X##_f0 & 1 \
89 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
90 X##_f1 >>= (N); \
91 }) \
92 : ({ \
93 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
94 ((((N) == _FP_W_TYPE_SIZE \
95 ? 0 \
96 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
97 | X##_f0) != 0)); \
98 X##_f1 = 0; \
99 }))
101 #define _FP_FRAC_ADDI_2(X,I) \
102 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
104 #define _FP_FRAC_ADD_2(R,X,Y) \
105 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
107 #define _FP_FRAC_SUB_2(R,X,Y) \
108 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
110 #define _FP_FRAC_DEC_2(X,Y) \
111 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
113 #define _FP_FRAC_CLZ_2(R,X) \
114 do { \
115 if (X##_f1) \
116 __FP_CLZ(R,X##_f1); \
117 else \
119 __FP_CLZ(R,X##_f0); \
120 R += _FP_W_TYPE_SIZE; \
122 } while(0)
124 /* Predicates */
125 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
126 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
127 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
128 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
129 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
130 #define _FP_FRAC_GT_2(X, Y) \
131 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
132 #define _FP_FRAC_GE_2(X, Y) \
133 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
135 #define _FP_ZEROFRAC_2 0, 0
136 #define _FP_MINFRAC_2 0, 1
137 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
140 * Internals
143 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
145 #define __FP_CLZ_2(R, xh, xl) \
146 do { \
147 if (xh) \
148 __FP_CLZ(R,xh); \
149 else \
151 __FP_CLZ(R,xl); \
152 R += _FP_W_TYPE_SIZE; \
154 } while(0)
156 #if 0
158 #ifndef __FP_FRAC_ADDI_2
159 #define __FP_FRAC_ADDI_2(xh, xl, i) \
160 (xh += ((xl += i) < i))
161 #endif
162 #ifndef __FP_FRAC_ADD_2
163 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
164 (rh = xh + yh + ((rl = xl + yl) < xl))
165 #endif
166 #ifndef __FP_FRAC_SUB_2
167 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
168 (rh = xh - yh - ((rl = xl - yl) > xl))
169 #endif
170 #ifndef __FP_FRAC_DEC_2
171 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
172 do { \
173 UWtype _t = xl; \
174 xh -= yh + ((xl -= yl) > _t); \
175 } while (0)
176 #endif
178 #else
180 #undef __FP_FRAC_ADDI_2
181 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
182 #undef __FP_FRAC_ADD_2
183 #define __FP_FRAC_ADD_2 add_ssaaaa
184 #undef __FP_FRAC_SUB_2
185 #define __FP_FRAC_SUB_2 sub_ddmmss
186 #undef __FP_FRAC_DEC_2
187 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
189 #endif
192 * Unpack the raw bits of a native fp value. Do not classify or
193 * normalize the data.
196 #define _FP_UNPACK_RAW_2(fs, X, val) \
197 do { \
198 union _FP_UNION_##fs _flo; _flo.flt = (val); \
200 X##_f0 = _flo.bits.frac0; \
201 X##_f1 = _flo.bits.frac1; \
202 X##_e = _flo.bits.exp; \
203 X##_s = _flo.bits.sign; \
204 } while (0)
206 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
207 do { \
208 union _FP_UNION_##fs *_flo = \
209 (union _FP_UNION_##fs *)(val); \
211 X##_f0 = _flo->bits.frac0; \
212 X##_f1 = _flo->bits.frac1; \
213 X##_e = _flo->bits.exp; \
214 X##_s = _flo->bits.sign; \
215 } while (0)
219 * Repack the raw bits of a native fp value.
222 #define _FP_PACK_RAW_2(fs, val, X) \
223 do { \
224 union _FP_UNION_##fs _flo; \
226 _flo.bits.frac0 = X##_f0; \
227 _flo.bits.frac1 = X##_f1; \
228 _flo.bits.exp = X##_e; \
229 _flo.bits.sign = X##_s; \
231 (val) = _flo.flt; \
232 } while (0)
234 #define _FP_PACK_RAW_2_P(fs, val, X) \
235 do { \
236 union _FP_UNION_##fs *_flo = \
237 (union _FP_UNION_##fs *)(val); \
239 _flo->bits.frac0 = X##_f0; \
240 _flo->bits.frac1 = X##_f1; \
241 _flo->bits.exp = X##_e; \
242 _flo->bits.sign = X##_s; \
243 } while (0)
247 * Multiplication algorithms:
250 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
252 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
253 do { \
254 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
256 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
257 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
258 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
259 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
261 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
262 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
263 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
264 _FP_FRAC_WORD_4(_z,1)); \
265 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
266 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
267 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
268 _FP_FRAC_WORD_4(_z,1)); \
270 /* Normalize since we know where the msb of the multiplicands \
271 were (bit B), we know that the msb of the of the product is \
272 at either 2B or 2B-1. */ \
273 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
274 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
275 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
276 } while (0)
278 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
279 Do only 3 multiplications instead of four. This one is for machines
280 where multiplication is much more expensive than subtraction. */
282 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
283 do { \
284 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
285 _FP_W_TYPE _d; \
286 int _c1, _c2; \
288 _b_f0 = X##_f0 + X##_f1; \
289 _c1 = _b_f0 < X##_f0; \
290 _b_f1 = Y##_f0 + Y##_f1; \
291 _c2 = _b_f1 < Y##_f0; \
292 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
293 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
294 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
296 _b_f0 &= -_c2; \
297 _b_f1 &= -_c1; \
298 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
299 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
300 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
301 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
302 _b_f0); \
303 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
304 _b_f1); \
305 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
306 _FP_FRAC_WORD_4(_z,1), \
307 0, _d, _FP_FRAC_WORD_4(_z,0)); \
308 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
309 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
310 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
311 _c_f1, _c_f0, \
312 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
314 /* Normalize since we know where the msb of the multiplicands \
315 were (bit B), we know that the msb of the of the product is \
316 at either 2B or 2B-1. */ \
317 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
318 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
319 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
320 } while (0)
322 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
323 do { \
324 _FP_FRAC_DECL_4(_z); \
325 _FP_W_TYPE _x[2], _y[2]; \
326 _x[0] = X##_f0; _x[1] = X##_f1; \
327 _y[0] = Y##_f0; _y[1] = Y##_f1; \
329 mpn_mul_n(_z_f, _x, _y, 2); \
331 /* Normalize since we know where the msb of the multiplicands \
332 were (bit B), we know that the msb of the of the product is \
333 at either 2B or 2B-1. */ \
334 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
335 R##_f0 = _z_f[0]; \
336 R##_f1 = _z_f[1]; \
337 } while (0)
339 /* Do at most 120x120=240 bits multiplication using double floating
340 point multiplication. This is useful if floating point
341 multiplication has much bigger throughput than integer multiply.
342 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
343 between 106 and 120 only.
344 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
345 SETFETZ is a macro which will disable all FPU exceptions and set rounding
346 towards zero, RESETFE should optionally reset it back. */
348 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
349 do { \
350 static const double _const[] = { \
351 /* 2^-24 */ 5.9604644775390625e-08, \
352 /* 2^-48 */ 3.5527136788005009e-15, \
353 /* 2^-72 */ 2.1175823681357508e-22, \
354 /* 2^-96 */ 1.2621774483536189e-29, \
355 /* 2^28 */ 2.68435456e+08, \
356 /* 2^4 */ 1.600000e+01, \
357 /* 2^-20 */ 9.5367431640625e-07, \
358 /* 2^-44 */ 5.6843418860808015e-14, \
359 /* 2^-68 */ 3.3881317890172014e-21, \
360 /* 2^-92 */ 2.0194839173657902e-28, \
361 /* 2^-116 */ 1.2037062152420224e-35}; \
362 double _a240, _b240, _c240, _d240, _e240, _f240, \
363 _g240, _h240, _i240, _j240, _k240; \
364 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
365 _p240, _q240, _r240, _s240; \
366 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
368 if (wfracbits < 106 || wfracbits > 120) \
369 abort(); \
371 setfetz; \
373 _e240 = (double)(long)(X##_f0 & 0xffffff); \
374 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
375 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
376 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
377 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
378 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
379 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
380 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
381 _a240 = (double)(long)(X##_f1 >> 32); \
382 _f240 = (double)(long)(Y##_f1 >> 32); \
383 _e240 *= _const[3]; \
384 _j240 *= _const[3]; \
385 _d240 *= _const[2]; \
386 _i240 *= _const[2]; \
387 _c240 *= _const[1]; \
388 _h240 *= _const[1]; \
389 _b240 *= _const[0]; \
390 _g240 *= _const[0]; \
391 _s240.d = _e240*_j240;\
392 _r240.d = _d240*_j240 + _e240*_i240;\
393 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
394 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
395 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
396 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
397 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
398 _l240.d = _a240*_g240 + _b240*_f240; \
399 _k240 = _a240*_f240; \
400 _r240.d += _s240.d; \
401 _q240.d += _r240.d; \
402 _p240.d += _q240.d; \
403 _o240.d += _p240.d; \
404 _n240.d += _o240.d; \
405 _m240.d += _n240.d; \
406 _l240.d += _m240.d; \
407 _k240 += _l240.d; \
408 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
409 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
410 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
411 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
412 _o240.d += _const[7]; \
413 _n240.d += _const[6]; \
414 _m240.d += _const[5]; \
415 _l240.d += _const[4]; \
416 if (_s240.d != 0.0) _y240 = 1; \
417 if (_r240.d != 0.0) _y240 = 1; \
418 if (_q240.d != 0.0) _y240 = 1; \
419 if (_p240.d != 0.0) _y240 = 1; \
420 _t240 = (DItype)_k240; \
421 _u240 = _l240.i; \
422 _v240 = _m240.i; \
423 _w240 = _n240.i; \
424 _x240 = _o240.i; \
425 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
426 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
427 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
428 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
429 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
430 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
431 | _y240; \
432 resetfe; \
433 } while (0)
436 * Division algorithms:
439 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
440 do { \
441 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
442 if (_FP_FRAC_GT_2(X, Y)) \
444 _n_f2 = X##_f1 >> 1; \
445 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
446 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
448 else \
450 R##_e--; \
451 _n_f2 = X##_f1; \
452 _n_f1 = X##_f0; \
453 _n_f0 = 0; \
456 /* Normalize, i.e. make the most significant bit of the \
457 denominator set. */ \
458 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
460 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
461 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
462 _r_f0 = _n_f0; \
463 if (_FP_FRAC_GT_2(_m, _r)) \
465 R##_f1--; \
466 _FP_FRAC_ADD_2(_r, Y, _r); \
467 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
469 R##_f1--; \
470 _FP_FRAC_ADD_2(_r, Y, _r); \
473 _FP_FRAC_DEC_2(_r, _m); \
475 if (_r_f1 == Y##_f1) \
477 /* This is a special case, not an optimization \
478 (_r/Y##_f1 would not fit into UWtype). \
479 As _r is guaranteed to be < Y, R##_f0 can be either \
480 (UWtype)-1 or (UWtype)-2. But as we know what kind \
481 of bits it is (sticky, guard, round), we don't care. \
482 We also don't care what the reminder is, because the \
483 guard bit will be set anyway. -jj */ \
484 R##_f0 = -1; \
486 else \
488 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
489 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
490 _r_f0 = 0; \
491 if (_FP_FRAC_GT_2(_m, _r)) \
493 R##_f0--; \
494 _FP_FRAC_ADD_2(_r, Y, _r); \
495 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
497 R##_f0--; \
498 _FP_FRAC_ADD_2(_r, Y, _r); \
501 if (!_FP_FRAC_EQ_2(_r, _m)) \
502 R##_f0 |= _FP_WORK_STICKY; \
504 } while (0)
507 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
508 do { \
509 _FP_W_TYPE _x[4], _y[2], _z[4]; \
510 _y[0] = Y##_f0; _y[1] = Y##_f1; \
511 _x[0] = _x[3] = 0; \
512 if (_FP_FRAC_GT_2(X, Y)) \
514 R##_e++; \
515 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
516 X##_f1 >> (_FP_W_TYPE_SIZE - \
517 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
518 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
520 else \
522 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
523 X##_f1 >> (_FP_W_TYPE_SIZE - \
524 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
525 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
528 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
529 R##_f1 = _z[1]; \
530 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
531 } while (0)
535 * Square root algorithms:
536 * We have just one right now, maybe Newton approximation
537 * should be added for those machines where division is fast.
540 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
541 do { \
542 while (q) \
544 T##_f1 = S##_f1 + q; \
545 if (T##_f1 <= X##_f1) \
547 S##_f1 = T##_f1 + q; \
548 X##_f1 -= T##_f1; \
549 R##_f1 += q; \
551 _FP_FRAC_SLL_2(X, 1); \
552 q >>= 1; \
554 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
555 while (q != _FP_WORK_ROUND) \
557 T##_f0 = S##_f0 + q; \
558 T##_f1 = S##_f1; \
559 if (T##_f1 < X##_f1 || \
560 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
562 S##_f0 = T##_f0 + q; \
563 S##_f1 += (T##_f0 > S##_f0); \
564 _FP_FRAC_DEC_2(X, T); \
565 R##_f0 += q; \
567 _FP_FRAC_SLL_2(X, 1); \
568 q >>= 1; \
570 if (X##_f0 | X##_f1) \
572 if (S##_f1 < X##_f1 || \
573 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
574 R##_f0 |= _FP_WORK_ROUND; \
575 R##_f0 |= _FP_WORK_STICKY; \
577 } while (0)
581 * Assembly/disassembly for converting to/from integral types.
582 * No shifting or overflow handled here.
585 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
586 (void)((rsize <= _FP_W_TYPE_SIZE) \
587 ? ({ r = X##_f0; }) \
588 : ({ \
589 r = X##_f1; \
590 r <<= _FP_W_TYPE_SIZE; \
591 r += X##_f0; \
594 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
595 do { \
596 X##_f0 = r; \
597 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
598 } while (0)
601 * Convert FP values between word sizes
604 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
606 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))