* elf/cache.c (save_aux_cache): Free memory allocated for
[glibc.git] / soft-fp / op-2.h
blob3a3b3aa0691342798d41c5c99eb65c4e9a868934
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, write to the Free
31 Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
32 MA 02110-1301, USA. */
34 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
35 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
36 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
37 #define _FP_FRAC_HIGH_2(X) (X##_f1)
38 #define _FP_FRAC_LOW_2(X) (X##_f0)
39 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
41 #define _FP_FRAC_SLL_2(X,N) \
42 (void)(((N) < _FP_W_TYPE_SIZE) \
43 ? ({ \
44 if (__builtin_constant_p(N) && (N) == 1) \
45 { \
46 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
47 X##_f0 += X##_f0; \
48 } \
49 else \
50 { \
51 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
52 X##_f0 <<= (N); \
53 } \
54 0; \
55 }) \
56 : ({ \
57 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
58 X##_f0 = 0; \
59 }))
62 #define _FP_FRAC_SRL_2(X,N) \
63 (void)(((N) < _FP_W_TYPE_SIZE) \
64 ? ({ \
65 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
66 X##_f1 >>= (N); \
67 }) \
68 : ({ \
69 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
70 X##_f1 = 0; \
71 }))
73 /* Right shift with sticky-lsb. */
74 #define _FP_FRAC_SRST_2(X,S, N,sz) \
75 (void)(((N) < _FP_W_TYPE_SIZE) \
76 ? ({ \
77 S = (__builtin_constant_p(N) && (N) == 1 \
78 ? X##_f0 & 1 \
79 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
80 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
81 X##_f1 >>= (N); \
82 }) \
83 : ({ \
84 S = ((((N) == _FP_W_TYPE_SIZE \
85 ? 0 \
86 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
87 | X##_f0) != 0); \
88 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
89 X##_f1 = 0; \
90 }))
92 #define _FP_FRAC_SRS_2(X,N,sz) \
93 (void)(((N) < _FP_W_TYPE_SIZE) \
94 ? ({ \
95 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
96 (__builtin_constant_p(N) && (N) == 1 \
97 ? X##_f0 & 1 \
98 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
99 X##_f1 >>= (N); \
100 }) \
101 : ({ \
102 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
103 ((((N) == _FP_W_TYPE_SIZE \
104 ? 0 \
105 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
106 | X##_f0) != 0)); \
107 X##_f1 = 0; \
110 #define _FP_FRAC_ADDI_2(X,I) \
111 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
113 #define _FP_FRAC_ADD_2(R,X,Y) \
114 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
116 #define _FP_FRAC_SUB_2(R,X,Y) \
117 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
119 #define _FP_FRAC_DEC_2(X,Y) \
120 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
122 #define _FP_FRAC_CLZ_2(R,X) \
123 do { \
124 if (X##_f1) \
125 __FP_CLZ(R,X##_f1); \
126 else \
128 __FP_CLZ(R,X##_f0); \
129 R += _FP_W_TYPE_SIZE; \
131 } while(0)
133 /* Predicates */
134 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
135 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
136 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
137 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
138 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
139 #define _FP_FRAC_GT_2(X, Y) \
140 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
141 #define _FP_FRAC_GE_2(X, Y) \
142 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
144 #define _FP_ZEROFRAC_2 0, 0
145 #define _FP_MINFRAC_2 0, 1
146 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
149 * Internals
152 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
154 #define __FP_CLZ_2(R, xh, xl) \
155 do { \
156 if (xh) \
157 __FP_CLZ(R,xh); \
158 else \
160 __FP_CLZ(R,xl); \
161 R += _FP_W_TYPE_SIZE; \
163 } while(0)
165 #if 0
167 #ifndef __FP_FRAC_ADDI_2
168 #define __FP_FRAC_ADDI_2(xh, xl, i) \
169 (xh += ((xl += i) < i))
170 #endif
171 #ifndef __FP_FRAC_ADD_2
172 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
173 (rh = xh + yh + ((rl = xl + yl) < xl))
174 #endif
175 #ifndef __FP_FRAC_SUB_2
176 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
177 (rh = xh - yh - ((rl = xl - yl) > xl))
178 #endif
179 #ifndef __FP_FRAC_DEC_2
180 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
181 do { \
182 UWtype _t = xl; \
183 xh -= yh + ((xl -= yl) > _t); \
184 } while (0)
185 #endif
187 #else
189 #undef __FP_FRAC_ADDI_2
190 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
191 #undef __FP_FRAC_ADD_2
192 #define __FP_FRAC_ADD_2 add_ssaaaa
193 #undef __FP_FRAC_SUB_2
194 #define __FP_FRAC_SUB_2 sub_ddmmss
195 #undef __FP_FRAC_DEC_2
196 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
198 #endif
201 * Unpack the raw bits of a native fp value. Do not classify or
202 * normalize the data.
205 #define _FP_UNPACK_RAW_2(fs, X, val) \
206 do { \
207 union _FP_UNION_##fs _flo; _flo.flt = (val); \
209 X##_f0 = _flo.bits.frac0; \
210 X##_f1 = _flo.bits.frac1; \
211 X##_e = _flo.bits.exp; \
212 X##_s = _flo.bits.sign; \
213 } while (0)
215 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
216 do { \
217 union _FP_UNION_##fs *_flo = \
218 (union _FP_UNION_##fs *)(val); \
220 X##_f0 = _flo->bits.frac0; \
221 X##_f1 = _flo->bits.frac1; \
222 X##_e = _flo->bits.exp; \
223 X##_s = _flo->bits.sign; \
224 } while (0)
228 * Repack the raw bits of a native fp value.
231 #define _FP_PACK_RAW_2(fs, val, X) \
232 do { \
233 union _FP_UNION_##fs _flo; \
235 _flo.bits.frac0 = X##_f0; \
236 _flo.bits.frac1 = X##_f1; \
237 _flo.bits.exp = X##_e; \
238 _flo.bits.sign = X##_s; \
240 (val) = _flo.flt; \
241 } while (0)
243 #define _FP_PACK_RAW_2_P(fs, val, X) \
244 do { \
245 union _FP_UNION_##fs *_flo = \
246 (union _FP_UNION_##fs *)(val); \
248 _flo->bits.frac0 = X##_f0; \
249 _flo->bits.frac1 = X##_f1; \
250 _flo->bits.exp = X##_e; \
251 _flo->bits.sign = X##_s; \
252 } while (0)
256 * Multiplication algorithms:
259 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
261 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
262 do { \
263 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
265 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
266 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
267 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
268 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
270 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
271 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
272 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
273 _FP_FRAC_WORD_4(_z,1)); \
274 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
275 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
276 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
277 _FP_FRAC_WORD_4(_z,1)); \
279 /* Normalize since we know where the msb of the multiplicands \
280 were (bit B), we know that the msb of the of the product is \
281 at either 2B or 2B-1. */ \
282 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
283 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
284 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
285 } while (0)
287 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
288 Do only 3 multiplications instead of four. This one is for machines
289 where multiplication is much more expensive than subtraction. */
291 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
292 do { \
293 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
294 _FP_W_TYPE _d; \
295 int _c1, _c2; \
297 _b_f0 = X##_f0 + X##_f1; \
298 _c1 = _b_f0 < X##_f0; \
299 _b_f1 = Y##_f0 + Y##_f1; \
300 _c2 = _b_f1 < Y##_f0; \
301 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
302 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
303 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
305 _b_f0 &= -_c2; \
306 _b_f1 &= -_c1; \
307 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
308 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
309 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
310 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
311 _b_f0); \
312 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
313 _b_f1); \
314 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
315 _FP_FRAC_WORD_4(_z,1), \
316 0, _d, _FP_FRAC_WORD_4(_z,0)); \
317 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
318 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
319 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
320 _c_f1, _c_f0, \
321 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
323 /* Normalize since we know where the msb of the multiplicands \
324 were (bit B), we know that the msb of the of the product is \
325 at either 2B or 2B-1. */ \
326 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
327 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
328 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
329 } while (0)
331 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
332 do { \
333 _FP_FRAC_DECL_4(_z); \
334 _FP_W_TYPE _x[2], _y[2]; \
335 _x[0] = X##_f0; _x[1] = X##_f1; \
336 _y[0] = Y##_f0; _y[1] = Y##_f1; \
338 mpn_mul_n(_z_f, _x, _y, 2); \
340 /* Normalize since we know where the msb of the multiplicands \
341 were (bit B), we know that the msb of the of the product is \
342 at either 2B or 2B-1. */ \
343 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
344 R##_f0 = _z_f[0]; \
345 R##_f1 = _z_f[1]; \
346 } while (0)
348 /* Do at most 120x120=240 bits multiplication using double floating
349 point multiplication. This is useful if floating point
350 multiplication has much bigger throughput than integer multiply.
351 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
352 between 106 and 120 only.
353 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
354 SETFETZ is a macro which will disable all FPU exceptions and set rounding
355 towards zero, RESETFE should optionally reset it back. */
357 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
358 do { \
359 static const double _const[] = { \
360 /* 2^-24 */ 5.9604644775390625e-08, \
361 /* 2^-48 */ 3.5527136788005009e-15, \
362 /* 2^-72 */ 2.1175823681357508e-22, \
363 /* 2^-96 */ 1.2621774483536189e-29, \
364 /* 2^28 */ 2.68435456e+08, \
365 /* 2^4 */ 1.600000e+01, \
366 /* 2^-20 */ 9.5367431640625e-07, \
367 /* 2^-44 */ 5.6843418860808015e-14, \
368 /* 2^-68 */ 3.3881317890172014e-21, \
369 /* 2^-92 */ 2.0194839173657902e-28, \
370 /* 2^-116 */ 1.2037062152420224e-35}; \
371 double _a240, _b240, _c240, _d240, _e240, _f240, \
372 _g240, _h240, _i240, _j240, _k240; \
373 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
374 _p240, _q240, _r240, _s240; \
375 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
377 if (wfracbits < 106 || wfracbits > 120) \
378 abort(); \
380 setfetz; \
382 _e240 = (double)(long)(X##_f0 & 0xffffff); \
383 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
384 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
385 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
386 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
387 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
388 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
389 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
390 _a240 = (double)(long)(X##_f1 >> 32); \
391 _f240 = (double)(long)(Y##_f1 >> 32); \
392 _e240 *= _const[3]; \
393 _j240 *= _const[3]; \
394 _d240 *= _const[2]; \
395 _i240 *= _const[2]; \
396 _c240 *= _const[1]; \
397 _h240 *= _const[1]; \
398 _b240 *= _const[0]; \
399 _g240 *= _const[0]; \
400 _s240.d = _e240*_j240;\
401 _r240.d = _d240*_j240 + _e240*_i240;\
402 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
403 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
404 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
405 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
406 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
407 _l240.d = _a240*_g240 + _b240*_f240; \
408 _k240 = _a240*_f240; \
409 _r240.d += _s240.d; \
410 _q240.d += _r240.d; \
411 _p240.d += _q240.d; \
412 _o240.d += _p240.d; \
413 _n240.d += _o240.d; \
414 _m240.d += _n240.d; \
415 _l240.d += _m240.d; \
416 _k240 += _l240.d; \
417 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
418 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
419 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
420 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
421 _o240.d += _const[7]; \
422 _n240.d += _const[6]; \
423 _m240.d += _const[5]; \
424 _l240.d += _const[4]; \
425 if (_s240.d != 0.0) _y240 = 1; \
426 if (_r240.d != 0.0) _y240 = 1; \
427 if (_q240.d != 0.0) _y240 = 1; \
428 if (_p240.d != 0.0) _y240 = 1; \
429 _t240 = (DItype)_k240; \
430 _u240 = _l240.i; \
431 _v240 = _m240.i; \
432 _w240 = _n240.i; \
433 _x240 = _o240.i; \
434 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
435 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
436 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
437 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
438 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
439 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
440 | _y240; \
441 resetfe; \
442 } while (0)
445 * Division algorithms:
448 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
449 do { \
450 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
451 if (_FP_FRAC_GT_2(X, Y)) \
453 _n_f2 = X##_f1 >> 1; \
454 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
455 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
457 else \
459 R##_e--; \
460 _n_f2 = X##_f1; \
461 _n_f1 = X##_f0; \
462 _n_f0 = 0; \
465 /* Normalize, i.e. make the most significant bit of the \
466 denominator set. */ \
467 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
469 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
470 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
471 _r_f0 = _n_f0; \
472 if (_FP_FRAC_GT_2(_m, _r)) \
474 R##_f1--; \
475 _FP_FRAC_ADD_2(_r, Y, _r); \
476 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
478 R##_f1--; \
479 _FP_FRAC_ADD_2(_r, Y, _r); \
482 _FP_FRAC_DEC_2(_r, _m); \
484 if (_r_f1 == Y##_f1) \
486 /* This is a special case, not an optimization \
487 (_r/Y##_f1 would not fit into UWtype). \
488 As _r is guaranteed to be < Y, R##_f0 can be either \
489 (UWtype)-1 or (UWtype)-2. But as we know what kind \
490 of bits it is (sticky, guard, round), we don't care. \
491 We also don't care what the reminder is, because the \
492 guard bit will be set anyway. -jj */ \
493 R##_f0 = -1; \
495 else \
497 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
498 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
499 _r_f0 = 0; \
500 if (_FP_FRAC_GT_2(_m, _r)) \
502 R##_f0--; \
503 _FP_FRAC_ADD_2(_r, Y, _r); \
504 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
506 R##_f0--; \
507 _FP_FRAC_ADD_2(_r, Y, _r); \
510 if (!_FP_FRAC_EQ_2(_r, _m)) \
511 R##_f0 |= _FP_WORK_STICKY; \
513 } while (0)
516 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
517 do { \
518 _FP_W_TYPE _x[4], _y[2], _z[4]; \
519 _y[0] = Y##_f0; _y[1] = Y##_f1; \
520 _x[0] = _x[3] = 0; \
521 if (_FP_FRAC_GT_2(X, Y)) \
523 R##_e++; \
524 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
525 X##_f1 >> (_FP_W_TYPE_SIZE - \
526 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
527 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
529 else \
531 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
532 X##_f1 >> (_FP_W_TYPE_SIZE - \
533 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
534 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
537 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
538 R##_f1 = _z[1]; \
539 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
540 } while (0)
544 * Square root algorithms:
545 * We have just one right now, maybe Newton approximation
546 * should be added for those machines where division is fast.
549 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
550 do { \
551 while (q) \
553 T##_f1 = S##_f1 + q; \
554 if (T##_f1 <= X##_f1) \
556 S##_f1 = T##_f1 + q; \
557 X##_f1 -= T##_f1; \
558 R##_f1 += q; \
560 _FP_FRAC_SLL_2(X, 1); \
561 q >>= 1; \
563 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
564 while (q != _FP_WORK_ROUND) \
566 T##_f0 = S##_f0 + q; \
567 T##_f1 = S##_f1; \
568 if (T##_f1 < X##_f1 || \
569 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
571 S##_f0 = T##_f0 + q; \
572 S##_f1 += (T##_f0 > S##_f0); \
573 _FP_FRAC_DEC_2(X, T); \
574 R##_f0 += q; \
576 _FP_FRAC_SLL_2(X, 1); \
577 q >>= 1; \
579 if (X##_f0 | X##_f1) \
581 if (S##_f1 < X##_f1 || \
582 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
583 R##_f0 |= _FP_WORK_ROUND; \
584 R##_f0 |= _FP_WORK_STICKY; \
586 } while (0)
590 * Assembly/disassembly for converting to/from integral types.
591 * No shifting or overflow handled here.
594 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
595 (void)((rsize <= _FP_W_TYPE_SIZE) \
596 ? ({ r = X##_f0; }) \
597 : ({ \
598 r = X##_f1; \
599 r <<= _FP_W_TYPE_SIZE; \
600 r += X##_f0; \
603 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
604 do { \
605 X##_f0 = r; \
606 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
607 } while (0)
610 * Convert FP values between word sizes
613 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
615 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
617 #define _FP_FRAC_COPY_2_2(D,S) _FP_FRAC_COPY_2(D,S)