Separate internal state between getXXent and getXXbyYY NSS calls (bug 18007)
[glibc.git] / soft-fp / op-2.h
blob20088227e1915a4ce88b3ce82faa6d41e3cb4a16
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997-2013 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
36 #define _FP_FRAC_HIGH_2(X) (X##_f1)
37 #define _FP_FRAC_LOW_2(X) (X##_f0)
38 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
40 #define _FP_FRAC_SLL_2(X,N) \
41 (void)(((N) < _FP_W_TYPE_SIZE) \
42 ? ({ \
43 if (__builtin_constant_p(N) && (N) == 1) \
44 { \
45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
46 X##_f0 += X##_f0; \
47 } \
48 else \
49 { \
50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 X##_f0 <<= (N); \
52 } \
53 0; \
54 }) \
55 : ({ \
56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57 X##_f0 = 0; \
58 }))
61 #define _FP_FRAC_SRL_2(X,N) \
62 (void)(((N) < _FP_W_TYPE_SIZE) \
63 ? ({ \
64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 X##_f1 >>= (N); \
66 }) \
67 : ({ \
68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69 X##_f1 = 0; \
70 }))
72 /* Right shift with sticky-lsb. */
73 #define _FP_FRAC_SRST_2(X,S, N,sz) \
74 (void)(((N) < _FP_W_TYPE_SIZE) \
75 ? ({ \
76 S = (__builtin_constant_p(N) && (N) == 1 \
77 ? X##_f0 & 1 \
78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 X##_f1 >>= (N); \
81 }) \
82 : ({ \
83 S = ((((N) == _FP_W_TYPE_SIZE \
84 ? 0 \
85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86 | X##_f0) != 0); \
87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88 X##_f1 = 0; \
89 }))
91 #define _FP_FRAC_SRS_2(X,N,sz) \
92 (void)(((N) < _FP_W_TYPE_SIZE) \
93 ? ({ \
94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
95 (__builtin_constant_p(N) && (N) == 1 \
96 ? X##_f0 & 1 \
97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 X##_f1 >>= (N); \
99 }) \
100 : ({ \
101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
102 ((((N) == _FP_W_TYPE_SIZE \
103 ? 0 \
104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105 | X##_f0) != 0)); \
106 X##_f1 = 0; \
109 #define _FP_FRAC_ADDI_2(X,I) \
110 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
112 #define _FP_FRAC_ADD_2(R,X,Y) \
113 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
115 #define _FP_FRAC_SUB_2(R,X,Y) \
116 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118 #define _FP_FRAC_DEC_2(X,Y) \
119 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
121 #define _FP_FRAC_CLZ_2(R,X) \
122 do { \
123 if (X##_f1) \
124 __FP_CLZ(R,X##_f1); \
125 else \
127 __FP_CLZ(R,X##_f0); \
128 R += _FP_W_TYPE_SIZE; \
130 } while(0)
132 /* Predicates */
133 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
134 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
135 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
136 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
137 #define _FP_FRAC_HIGHBIT_DW_2(fs,X) \
138 (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
139 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
140 #define _FP_FRAC_GT_2(X, Y) \
141 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
142 #define _FP_FRAC_GE_2(X, Y) \
143 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
145 #define _FP_ZEROFRAC_2 0, 0
146 #define _FP_MINFRAC_2 0, 1
147 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
150 * Internals
153 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
155 #define __FP_CLZ_2(R, xh, xl) \
156 do { \
157 if (xh) \
158 __FP_CLZ(R,xh); \
159 else \
161 __FP_CLZ(R,xl); \
162 R += _FP_W_TYPE_SIZE; \
164 } while(0)
166 #if 0
168 #ifndef __FP_FRAC_ADDI_2
169 #define __FP_FRAC_ADDI_2(xh, xl, i) \
170 (xh += ((xl += i) < i))
171 #endif
172 #ifndef __FP_FRAC_ADD_2
173 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
174 (rh = xh + yh + ((rl = xl + yl) < xl))
175 #endif
176 #ifndef __FP_FRAC_SUB_2
177 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
178 (rh = xh - yh - ((rl = xl - yl) > xl))
179 #endif
180 #ifndef __FP_FRAC_DEC_2
181 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
182 do { \
183 UWtype _t = xl; \
184 xh -= yh + ((xl -= yl) > _t); \
185 } while (0)
186 #endif
188 #else
190 #undef __FP_FRAC_ADDI_2
191 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
192 #undef __FP_FRAC_ADD_2
193 #define __FP_FRAC_ADD_2 add_ssaaaa
194 #undef __FP_FRAC_SUB_2
195 #define __FP_FRAC_SUB_2 sub_ddmmss
196 #undef __FP_FRAC_DEC_2
197 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
199 #endif
202 * Unpack the raw bits of a native fp value. Do not classify or
203 * normalize the data.
206 #define _FP_UNPACK_RAW_2(fs, X, val) \
207 do { \
208 union _FP_UNION_##fs _flo; _flo.flt = (val); \
210 X##_f0 = _flo.bits.frac0; \
211 X##_f1 = _flo.bits.frac1; \
212 X##_e = _flo.bits.exp; \
213 X##_s = _flo.bits.sign; \
214 } while (0)
216 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
217 do { \
218 union _FP_UNION_##fs *_flo = \
219 (union _FP_UNION_##fs *)(val); \
221 X##_f0 = _flo->bits.frac0; \
222 X##_f1 = _flo->bits.frac1; \
223 X##_e = _flo->bits.exp; \
224 X##_s = _flo->bits.sign; \
225 } while (0)
229 * Repack the raw bits of a native fp value.
232 #define _FP_PACK_RAW_2(fs, val, X) \
233 do { \
234 union _FP_UNION_##fs _flo; \
236 _flo.bits.frac0 = X##_f0; \
237 _flo.bits.frac1 = X##_f1; \
238 _flo.bits.exp = X##_e; \
239 _flo.bits.sign = X##_s; \
241 (val) = _flo.flt; \
242 } while (0)
244 #define _FP_PACK_RAW_2_P(fs, val, X) \
245 do { \
246 union _FP_UNION_##fs *_flo = \
247 (union _FP_UNION_##fs *)(val); \
249 _flo->bits.frac0 = X##_f0; \
250 _flo->bits.frac1 = X##_f1; \
251 _flo->bits.exp = X##_e; \
252 _flo->bits.sign = X##_s; \
253 } while (0)
257 * Multiplication algorithms:
260 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
262 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
263 do { \
264 _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
266 doit(_FP_FRAC_WORD_4(R,1), _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0); \
267 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
268 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
269 doit(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2), X##_f1, Y##_f1); \
271 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
272 _FP_FRAC_WORD_4(R,1), 0, _b_f1, _b_f0, \
273 _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
274 _FP_FRAC_WORD_4(R,1)); \
275 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
276 _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0, \
277 _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
278 _FP_FRAC_WORD_4(R,1)); \
279 } while (0)
281 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
282 do { \
283 _FP_FRAC_DECL_4(_z); \
285 _FP_MUL_MEAT_DW_2_wide(wfracbits, _z, X, Y, doit); \
287 /* Normalize since we know where the msb of the multiplicands \
288 were (bit B), we know that the msb of the of the product is \
289 at either 2B or 2B-1. */ \
290 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
291 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
292 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
293 } while (0)
295 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
296 Do only 3 multiplications instead of four. This one is for machines
297 where multiplication is much more expensive than subtraction. */
299 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
300 do { \
301 _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
302 _FP_W_TYPE _d; \
303 int _c1, _c2; \
305 _b_f0 = X##_f0 + X##_f1; \
306 _c1 = _b_f0 < X##_f0; \
307 _b_f1 = Y##_f0 + Y##_f1; \
308 _c2 = _b_f1 < Y##_f0; \
309 doit(_d, _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0); \
310 doit(_FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1), _b_f0, _b_f1); \
311 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
313 _b_f0 &= -_c2; \
314 _b_f1 &= -_c1; \
315 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
316 _FP_FRAC_WORD_4(R,1), (_c1 & _c2), 0, _d, \
317 0, _FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1)); \
318 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
319 _b_f0); \
320 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
321 _b_f1); \
322 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
323 _FP_FRAC_WORD_4(R,1), \
324 0, _d, _FP_FRAC_WORD_4(R,0)); \
325 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
326 _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0); \
327 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2), \
328 _c_f1, _c_f0, \
329 _FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2)); \
330 } while (0)
332 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
333 do { \
334 _FP_FRAC_DECL_4(_z); \
336 _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, _z, X, Y, doit); \
338 /* Normalize since we know where the msb of the multiplicands \
339 were (bit B), we know that the msb of the of the product is \
340 at either 2B or 2B-1. */ \
341 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
342 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
343 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
344 } while (0)
346 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
347 do { \
348 _FP_W_TYPE _x[2], _y[2]; \
349 _x[0] = X##_f0; _x[1] = X##_f1; \
350 _y[0] = Y##_f0; _y[1] = Y##_f1; \
352 mpn_mul_n(R##_f, _x, _y, 2); \
353 } while (0)
355 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
356 do { \
357 _FP_FRAC_DECL_4(_z); \
359 _FP_MUL_MEAT_DW_2_gmp(wfracbits, _z, X, Y); \
361 /* Normalize since we know where the msb of the multiplicands \
362 were (bit B), we know that the msb of the of the product is \
363 at either 2B or 2B-1. */ \
364 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
365 R##_f0 = _z_f[0]; \
366 R##_f1 = _z_f[1]; \
367 } while (0)
369 /* Do at most 120x120=240 bits multiplication using double floating
370 point multiplication. This is useful if floating point
371 multiplication has much bigger throughput than integer multiply.
372 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
373 between 106 and 120 only.
374 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
375 SETFETZ is a macro which will disable all FPU exceptions and set rounding
376 towards zero, RESETFE should optionally reset it back. */
378 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
379 do { \
380 static const double _const[] = { \
381 /* 2^-24 */ 5.9604644775390625e-08, \
382 /* 2^-48 */ 3.5527136788005009e-15, \
383 /* 2^-72 */ 2.1175823681357508e-22, \
384 /* 2^-96 */ 1.2621774483536189e-29, \
385 /* 2^28 */ 2.68435456e+08, \
386 /* 2^4 */ 1.600000e+01, \
387 /* 2^-20 */ 9.5367431640625e-07, \
388 /* 2^-44 */ 5.6843418860808015e-14, \
389 /* 2^-68 */ 3.3881317890172014e-21, \
390 /* 2^-92 */ 2.0194839173657902e-28, \
391 /* 2^-116 */ 1.2037062152420224e-35}; \
392 double _a240, _b240, _c240, _d240, _e240, _f240, \
393 _g240, _h240, _i240, _j240, _k240; \
394 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
395 _p240, _q240, _r240, _s240; \
396 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
398 if (wfracbits < 106 || wfracbits > 120) \
399 abort(); \
401 setfetz; \
403 _e240 = (double)(long)(X##_f0 & 0xffffff); \
404 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
405 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
406 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
407 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
408 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
409 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
410 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
411 _a240 = (double)(long)(X##_f1 >> 32); \
412 _f240 = (double)(long)(Y##_f1 >> 32); \
413 _e240 *= _const[3]; \
414 _j240 *= _const[3]; \
415 _d240 *= _const[2]; \
416 _i240 *= _const[2]; \
417 _c240 *= _const[1]; \
418 _h240 *= _const[1]; \
419 _b240 *= _const[0]; \
420 _g240 *= _const[0]; \
421 _s240.d = _e240*_j240;\
422 _r240.d = _d240*_j240 + _e240*_i240;\
423 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
424 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
425 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
426 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
427 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
428 _l240.d = _a240*_g240 + _b240*_f240; \
429 _k240 = _a240*_f240; \
430 _r240.d += _s240.d; \
431 _q240.d += _r240.d; \
432 _p240.d += _q240.d; \
433 _o240.d += _p240.d; \
434 _n240.d += _o240.d; \
435 _m240.d += _n240.d; \
436 _l240.d += _m240.d; \
437 _k240 += _l240.d; \
438 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
439 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
440 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
441 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
442 _o240.d += _const[7]; \
443 _n240.d += _const[6]; \
444 _m240.d += _const[5]; \
445 _l240.d += _const[4]; \
446 if (_s240.d != 0.0) _y240 = 1; \
447 if (_r240.d != 0.0) _y240 = 1; \
448 if (_q240.d != 0.0) _y240 = 1; \
449 if (_p240.d != 0.0) _y240 = 1; \
450 _t240 = (DItype)_k240; \
451 _u240 = _l240.i; \
452 _v240 = _m240.i; \
453 _w240 = _n240.i; \
454 _x240 = _o240.i; \
455 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
456 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
457 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
458 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
459 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
460 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
461 | _y240; \
462 resetfe; \
463 } while (0)
466 * Division algorithms:
469 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
470 do { \
471 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
472 if (_FP_FRAC_GT_2(X, Y)) \
474 _n_f2 = X##_f1 >> 1; \
475 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
476 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
478 else \
480 R##_e--; \
481 _n_f2 = X##_f1; \
482 _n_f1 = X##_f0; \
483 _n_f0 = 0; \
486 /* Normalize, i.e. make the most significant bit of the \
487 denominator set. */ \
488 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
490 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
491 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
492 _r_f0 = _n_f0; \
493 if (_FP_FRAC_GT_2(_m, _r)) \
495 R##_f1--; \
496 _FP_FRAC_ADD_2(_r, Y, _r); \
497 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
499 R##_f1--; \
500 _FP_FRAC_ADD_2(_r, Y, _r); \
503 _FP_FRAC_DEC_2(_r, _m); \
505 if (_r_f1 == Y##_f1) \
507 /* This is a special case, not an optimization \
508 (_r/Y##_f1 would not fit into UWtype). \
509 As _r is guaranteed to be < Y, R##_f0 can be either \
510 (UWtype)-1 or (UWtype)-2. But as we know what kind \
511 of bits it is (sticky, guard, round), we don't care. \
512 We also don't care what the reminder is, because the \
513 guard bit will be set anyway. -jj */ \
514 R##_f0 = -1; \
516 else \
518 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
519 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
520 _r_f0 = 0; \
521 if (_FP_FRAC_GT_2(_m, _r)) \
523 R##_f0--; \
524 _FP_FRAC_ADD_2(_r, Y, _r); \
525 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
527 R##_f0--; \
528 _FP_FRAC_ADD_2(_r, Y, _r); \
531 if (!_FP_FRAC_EQ_2(_r, _m)) \
532 R##_f0 |= _FP_WORK_STICKY; \
534 } while (0)
537 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
538 do { \
539 _FP_W_TYPE _x[4], _y[2], _z[4]; \
540 _y[0] = Y##_f0; _y[1] = Y##_f1; \
541 _x[0] = _x[3] = 0; \
542 if (_FP_FRAC_GT_2(X, Y)) \
544 R##_e++; \
545 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
546 X##_f1 >> (_FP_W_TYPE_SIZE - \
547 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
548 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
550 else \
552 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
553 X##_f1 >> (_FP_W_TYPE_SIZE - \
554 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
555 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
558 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
559 R##_f1 = _z[1]; \
560 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
561 } while (0)
565 * Square root algorithms:
566 * We have just one right now, maybe Newton approximation
567 * should be added for those machines where division is fast.
570 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
571 do { \
572 while (q) \
574 T##_f1 = S##_f1 + q; \
575 if (T##_f1 <= X##_f1) \
577 S##_f1 = T##_f1 + q; \
578 X##_f1 -= T##_f1; \
579 R##_f1 += q; \
581 _FP_FRAC_SLL_2(X, 1); \
582 q >>= 1; \
584 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
585 while (q != _FP_WORK_ROUND) \
587 T##_f0 = S##_f0 + q; \
588 T##_f1 = S##_f1; \
589 if (T##_f1 < X##_f1 || \
590 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
592 S##_f0 = T##_f0 + q; \
593 S##_f1 += (T##_f0 > S##_f0); \
594 _FP_FRAC_DEC_2(X, T); \
595 R##_f0 += q; \
597 _FP_FRAC_SLL_2(X, 1); \
598 q >>= 1; \
600 if (X##_f0 | X##_f1) \
602 if (S##_f1 < X##_f1 || \
603 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
604 R##_f0 |= _FP_WORK_ROUND; \
605 R##_f0 |= _FP_WORK_STICKY; \
607 } while (0)
611 * Assembly/disassembly for converting to/from integral types.
612 * No shifting or overflow handled here.
615 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
616 (void)((rsize <= _FP_W_TYPE_SIZE) \
617 ? ({ r = X##_f0; }) \
618 : ({ \
619 r = X##_f1; \
620 r <<= _FP_W_TYPE_SIZE; \
621 r += X##_f0; \
624 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
625 do { \
626 X##_f0 = r; \
627 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
628 } while (0)
631 * Convert FP values between word sizes
634 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
636 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
638 #define _FP_FRAC_COPY_2_2(D,S) _FP_FRAC_COPY_2(D,S)