Update copyright notices with scripts/update-copyrights
[glibc.git] / soft-fp / op-2.h
blob160990fe4e8fe1586c76e2c00c7bce2f1eec0f38
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
36 #define _FP_FRAC_HIGH_2(X) (X##_f1)
37 #define _FP_FRAC_LOW_2(X) (X##_f0)
38 #define _FP_FRAC_WORD_2(X, w) (X##_f##w)
40 #define _FP_FRAC_SLL_2(X, N) \
41 (void) (((N) < _FP_W_TYPE_SIZE) \
42 ? ({ \
43 if (__builtin_constant_p (N) && (N) == 1) \
44 { \
45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46 X##_f0 += X##_f0; \
47 } \
48 else \
49 { \
50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 X##_f0 <<= (N); \
52 } \
53 0; \
54 }) \
55 : ({ \
56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57 X##_f0 = 0; \
58 }))
61 #define _FP_FRAC_SRL_2(X, N) \
62 (void) (((N) < _FP_W_TYPE_SIZE) \
63 ? ({ \
64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 X##_f1 >>= (N); \
66 }) \
67 : ({ \
68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69 X##_f1 = 0; \
70 }))
72 /* Right shift with sticky-lsb. */
73 #define _FP_FRAC_SRST_2(X, S, N, sz) \
74 (void) (((N) < _FP_W_TYPE_SIZE) \
75 ? ({ \
76 S = (__builtin_constant_p (N) && (N) == 1 \
77 ? X##_f0 & 1 \
78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 X##_f1 >>= (N); \
81 }) \
82 : ({ \
83 S = ((((N) == _FP_W_TYPE_SIZE \
84 ? 0 \
85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86 | X##_f0) != 0); \
87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88 X##_f1 = 0; \
89 }))
91 #define _FP_FRAC_SRS_2(X, N, sz) \
92 (void) (((N) < _FP_W_TYPE_SIZE) \
93 ? ({ \
94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95 | (__builtin_constant_p (N) && (N) == 1 \
96 ? X##_f0 & 1 \
97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 X##_f1 >>= (N); \
99 }) \
100 : ({ \
101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
102 | ((((N) == _FP_W_TYPE_SIZE \
103 ? 0 \
104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105 | X##_f0) != 0)); \
106 X##_f1 = 0; \
109 #define _FP_FRAC_ADDI_2(X, I) \
110 __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
112 #define _FP_FRAC_ADD_2(R, X, Y) \
113 __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
115 #define _FP_FRAC_SUB_2(R, X, Y) \
116 __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118 #define _FP_FRAC_DEC_2(X, Y) \
119 __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
121 #define _FP_FRAC_CLZ_2(R, X) \
122 do \
124 if (X##_f1) \
125 __FP_CLZ (R, X##_f1); \
126 else \
128 __FP_CLZ (R, X##_f0); \
129 R += _FP_W_TYPE_SIZE; \
132 while (0)
134 /* Predicates */
135 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
136 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
140 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 #define _FP_FRAC_GT_2(X, Y) \
143 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 #define _FP_FRAC_GE_2(X, Y) \
145 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
147 #define _FP_ZEROFRAC_2 0, 0
148 #define _FP_MINFRAC_2 0, 1
149 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
152 * Internals
155 #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
157 #define __FP_CLZ_2(R, xh, xl) \
158 do \
160 if (xh) \
161 __FP_CLZ (R, xh); \
162 else \
164 __FP_CLZ (R, xl); \
165 R += _FP_W_TYPE_SIZE; \
168 while (0)
170 #if 0
172 # ifndef __FP_FRAC_ADDI_2
173 # define __FP_FRAC_ADDI_2(xh, xl, i) \
174 (xh += ((xl += i) < i))
175 # endif
176 # ifndef __FP_FRAC_ADD_2
177 # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
178 (rh = xh + yh + ((rl = xl + yl) < xl))
179 # endif
180 # ifndef __FP_FRAC_SUB_2
181 # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
182 (rh = xh - yh - ((rl = xl - yl) > xl))
183 # endif
184 # ifndef __FP_FRAC_DEC_2
185 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
186 do \
188 UWtype _t = xl; \
189 xh -= yh + ((xl -= yl) > _t); \
191 while (0)
192 # endif
194 #else
196 # undef __FP_FRAC_ADDI_2
197 # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
198 # undef __FP_FRAC_ADD_2
199 # define __FP_FRAC_ADD_2 add_ssaaaa
200 # undef __FP_FRAC_SUB_2
201 # define __FP_FRAC_SUB_2 sub_ddmmss
202 # undef __FP_FRAC_DEC_2
203 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
204 sub_ddmmss (xh, xl, xh, xl, yh, yl)
206 #endif
209 * Unpack the raw bits of a native fp value. Do not classify or
210 * normalize the data.
213 #define _FP_UNPACK_RAW_2(fs, X, val) \
214 do \
216 union _FP_UNION_##fs _flo; \
217 _flo.flt = (val); \
219 X##_f0 = _flo.bits.frac0; \
220 X##_f1 = _flo.bits.frac1; \
221 X##_e = _flo.bits.exp; \
222 X##_s = _flo.bits.sign; \
224 while (0)
226 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
227 do \
229 union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
231 X##_f0 = _flo->bits.frac0; \
232 X##_f1 = _flo->bits.frac1; \
233 X##_e = _flo->bits.exp; \
234 X##_s = _flo->bits.sign; \
236 while (0)
240 * Repack the raw bits of a native fp value.
243 #define _FP_PACK_RAW_2(fs, val, X) \
244 do \
246 union _FP_UNION_##fs _flo; \
248 _flo.bits.frac0 = X##_f0; \
249 _flo.bits.frac1 = X##_f1; \
250 _flo.bits.exp = X##_e; \
251 _flo.bits.sign = X##_s; \
253 (val) = _flo.flt; \
255 while (0)
257 #define _FP_PACK_RAW_2_P(fs, val, X) \
258 do \
260 union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
262 _flo->bits.frac0 = X##_f0; \
263 _flo->bits.frac1 = X##_f1; \
264 _flo->bits.exp = X##_e; \
265 _flo->bits.sign = X##_s; \
267 while (0)
271 * Multiplication algorithms:
274 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
276 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
277 do \
279 _FP_FRAC_DECL_2 (_b); \
280 _FP_FRAC_DECL_2 (_c); \
282 doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
283 doit (_b_f1, _b_f0, X##_f0, Y##_f1); \
284 doit (_c_f1, _c_f0, X##_f1, Y##_f0); \
285 doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \
287 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
288 _FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0, \
289 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290 _FP_FRAC_WORD_4 (R, 1)); \
291 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
292 _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0, \
293 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
294 _FP_FRAC_WORD_4 (R, 1)); \
296 while (0)
298 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
299 do \
301 _FP_FRAC_DECL_4 (_z); \
303 _FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit); \
305 /* Normalize since we know where the msb of the multiplicands \
306 were (bit B), we know that the msb of the of the product is \
307 at either 2B or 2B-1. */ \
308 _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \
309 R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \
310 R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \
312 while (0)
314 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
315 Do only 3 multiplications instead of four. This one is for machines
316 where multiplication is much more expensive than subtraction. */
318 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
319 do \
321 _FP_FRAC_DECL_2 (_b); \
322 _FP_FRAC_DECL_2 (_c); \
323 _FP_W_TYPE _d; \
324 int _c1, _c2; \
326 _b_f0 = X##_f0 + X##_f1; \
327 _c1 = _b_f0 < X##_f0; \
328 _b_f1 = Y##_f0 + Y##_f1; \
329 _c2 = _b_f1 < Y##_f0; \
330 doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
331 doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \
332 doit (_c_f1, _c_f0, X##_f1, Y##_f1); \
334 _b_f0 &= -_c2; \
335 _b_f1 &= -_c1; \
336 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
337 _FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d, \
338 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
339 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
340 _b_f0); \
341 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
342 _b_f1); \
343 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
344 _FP_FRAC_WORD_4 (R, 1), \
345 0, _d, _FP_FRAC_WORD_4 (R, 0)); \
346 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
347 _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0); \
348 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
349 _c_f1, _c_f0, \
350 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
352 while (0)
354 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
355 do \
357 _FP_FRAC_DECL_4 (_z); \
359 _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit); \
361 /* Normalize since we know where the msb of the multiplicands \
362 were (bit B), we know that the msb of the of the product is \
363 at either 2B or 2B-1. */ \
364 _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \
365 R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \
366 R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \
368 while (0)
370 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
371 do \
373 _FP_W_TYPE _x[2], _y[2]; \
374 _x[0] = X##_f0; \
375 _x[1] = X##_f1; \
376 _y[0] = Y##_f0; \
377 _y[1] = Y##_f1; \
379 mpn_mul_n (R##_f, _x, _y, 2); \
381 while (0)
383 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
384 do \
386 _FP_FRAC_DECL_4 (_z); \
388 _FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y); \
390 /* Normalize since we know where the msb of the multiplicands \
391 were (bit B), we know that the msb of the of the product is \
392 at either 2B or 2B-1. */ \
393 _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \
394 R##_f0 = _z_f[0]; \
395 R##_f1 = _z_f[1]; \
397 while (0)
399 /* Do at most 120x120=240 bits multiplication using double floating
400 point multiplication. This is useful if floating point
401 multiplication has much bigger throughput than integer multiply.
402 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
403 between 106 and 120 only.
404 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
405 SETFETZ is a macro which will disable all FPU exceptions and set rounding
406 towards zero, RESETFE should optionally reset it back. */
408 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
409 do \
411 static const double _const[] = \
413 /* 2^-24 */ 5.9604644775390625e-08, \
414 /* 2^-48 */ 3.5527136788005009e-15, \
415 /* 2^-72 */ 2.1175823681357508e-22, \
416 /* 2^-96 */ 1.2621774483536189e-29, \
417 /* 2^28 */ 2.68435456e+08, \
418 /* 2^4 */ 1.600000e+01, \
419 /* 2^-20 */ 9.5367431640625e-07, \
420 /* 2^-44 */ 5.6843418860808015e-14, \
421 /* 2^-68 */ 3.3881317890172014e-21, \
422 /* 2^-92 */ 2.0194839173657902e-28, \
423 /* 2^-116 */ 1.2037062152420224e-35 \
424 }; \
425 double _a240, _b240, _c240, _d240, _e240, _f240, \
426 _g240, _h240, _i240, _j240, _k240; \
427 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
428 _p240, _q240, _r240, _s240; \
429 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
431 if (wfracbits < 106 || wfracbits > 120) \
432 abort (); \
434 setfetz; \
436 _e240 = (double) (long) (X##_f0 & 0xffffff); \
437 _j240 = (double) (long) (Y##_f0 & 0xffffff); \
438 _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
439 _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
440 _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
441 _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
442 _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
443 _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
444 _a240 = (double) (long) (X##_f1 >> 32); \
445 _f240 = (double) (long) (Y##_f1 >> 32); \
446 _e240 *= _const[3]; \
447 _j240 *= _const[3]; \
448 _d240 *= _const[2]; \
449 _i240 *= _const[2]; \
450 _c240 *= _const[1]; \
451 _h240 *= _const[1]; \
452 _b240 *= _const[0]; \
453 _g240 *= _const[0]; \
454 _s240.d = _e240*_j240; \
455 _r240.d = _d240*_j240 + _e240*_i240; \
456 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
457 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
458 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
459 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
460 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
461 _l240.d = _a240*_g240 + _b240*_f240; \
462 _k240 = _a240*_f240; \
463 _r240.d += _s240.d; \
464 _q240.d += _r240.d; \
465 _p240.d += _q240.d; \
466 _o240.d += _p240.d; \
467 _n240.d += _o240.d; \
468 _m240.d += _n240.d; \
469 _l240.d += _m240.d; \
470 _k240 += _l240.d; \
471 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
472 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
473 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
474 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
475 _o240.d += _const[7]; \
476 _n240.d += _const[6]; \
477 _m240.d += _const[5]; \
478 _l240.d += _const[4]; \
479 if (_s240.d != 0.0) \
480 _y240 = 1; \
481 if (_r240.d != 0.0) \
482 _y240 = 1; \
483 if (_q240.d != 0.0) \
484 _y240 = 1; \
485 if (_p240.d != 0.0) \
486 _y240 = 1; \
487 _t240 = (DItype) _k240; \
488 _u240 = _l240.i; \
489 _v240 = _m240.i; \
490 _w240 = _n240.i; \
491 _x240 = _o240.i; \
492 R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
493 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
494 R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
495 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
496 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
497 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
498 | _y240); \
499 resetfe; \
501 while (0)
504 * Division algorithms:
507 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
508 do \
510 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
511 if (_FP_FRAC_GE_2 (X, Y)) \
513 _n_f2 = X##_f1 >> 1; \
514 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
515 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
517 else \
519 R##_e--; \
520 _n_f2 = X##_f1; \
521 _n_f1 = X##_f0; \
522 _n_f0 = 0; \
525 /* Normalize, i.e. make the most significant bit of the \
526 denominator set. */ \
527 _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
529 udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
530 umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0); \
531 _r_f0 = _n_f0; \
532 if (_FP_FRAC_GT_2 (_m, _r)) \
534 R##_f1--; \
535 _FP_FRAC_ADD_2 (_r, Y, _r); \
536 if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \
538 R##_f1--; \
539 _FP_FRAC_ADD_2 (_r, Y, _r); \
542 _FP_FRAC_DEC_2 (_r, _m); \
544 if (_r_f1 == Y##_f1) \
546 /* This is a special case, not an optimization \
547 (_r/Y##_f1 would not fit into UWtype). \
548 As _r is guaranteed to be < Y, R##_f0 can be either \
549 (UWtype)-1 or (UWtype)-2. But as we know what kind \
550 of bits it is (sticky, guard, round), we don't care. \
551 We also don't care what the reminder is, because the \
552 guard bit will be set anyway. -jj */ \
553 R##_f0 = -1; \
555 else \
557 udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
558 umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0); \
559 _r_f0 = 0; \
560 if (_FP_FRAC_GT_2 (_m, _r)) \
562 R##_f0--; \
563 _FP_FRAC_ADD_2 (_r, Y, _r); \
564 if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \
566 R##_f0--; \
567 _FP_FRAC_ADD_2 (_r, Y, _r); \
570 if (!_FP_FRAC_EQ_2 (_r, _m)) \
571 R##_f0 |= _FP_WORK_STICKY; \
574 while (0)
578 * Square root algorithms:
579 * We have just one right now, maybe Newton approximation
580 * should be added for those machines where division is fast.
583 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
584 do \
586 while (q) \
588 T##_f1 = S##_f1 + q; \
589 if (T##_f1 <= X##_f1) \
591 S##_f1 = T##_f1 + q; \
592 X##_f1 -= T##_f1; \
593 R##_f1 += q; \
595 _FP_FRAC_SLL_2 (X, 1); \
596 q >>= 1; \
598 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
599 while (q != _FP_WORK_ROUND) \
601 T##_f0 = S##_f0 + q; \
602 T##_f1 = S##_f1; \
603 if (T##_f1 < X##_f1 \
604 || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
606 S##_f0 = T##_f0 + q; \
607 S##_f1 += (T##_f0 > S##_f0); \
608 _FP_FRAC_DEC_2 (X, T); \
609 R##_f0 += q; \
611 _FP_FRAC_SLL_2 (X, 1); \
612 q >>= 1; \
614 if (X##_f0 | X##_f1) \
616 if (S##_f1 < X##_f1 \
617 || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
618 R##_f0 |= _FP_WORK_ROUND; \
619 R##_f0 |= _FP_WORK_STICKY; \
622 while (0)
626 * Assembly/disassembly for converting to/from integral types.
627 * No shifting or overflow handled here.
630 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
631 (void) ((rsize <= _FP_W_TYPE_SIZE) \
632 ? ({ r = X##_f0; }) \
633 : ({ \
634 r = X##_f1; \
635 r <<= _FP_W_TYPE_SIZE; \
636 r += X##_f0; \
639 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
640 do \
642 X##_f0 = r; \
643 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
645 while (0)
648 * Convert FP values between word sizes
651 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
653 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
655 #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)