1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997-2022 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with the GNU C Library; if not, see
27 <https://www.gnu.org/licenses/>. */
29 #ifndef SOFT_FP_OP_2_H
30 #define SOFT_FP_OP_2_H 1
32 #define _FP_FRAC_DECL_2(X) \
33 _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
34 #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
36 #define _FP_FRAC_HIGH_2(X) (X##_f1)
37 #define _FP_FRAC_LOW_2(X) (X##_f0)
38 #define _FP_FRAC_WORD_2(X, w) (X##_f##w)
40 #define _FP_FRAC_SLL_2(X, N) \
41 (void) (((N) < _FP_W_TYPE_SIZE) \
43 if (__builtin_constant_p (N) && (N) == 1) \
45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
61 #define _FP_FRAC_SRL_2(X, N) \
62 (void) (((N) < _FP_W_TYPE_SIZE) \
64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
72 /* Right shift with sticky-lsb. */
73 #define _FP_FRAC_SRST_2(X, S, N, sz) \
74 (void) (((N) < _FP_W_TYPE_SIZE) \
76 S = (__builtin_constant_p (N) && (N) == 1 \
78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
83 S = ((((N) == _FP_W_TYPE_SIZE \
85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
91 #define _FP_FRAC_SRS_2(X, N, sz) \
92 (void) (((N) < _FP_W_TYPE_SIZE) \
94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95 | (__builtin_constant_p (N) && (N) == 1 \
97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
102 | ((((N) == _FP_W_TYPE_SIZE \
104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
109 #define _FP_FRAC_ADDI_2(X, I) \
110 __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
112 #define _FP_FRAC_ADD_2(R, X, Y) \
113 __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
115 #define _FP_FRAC_SUB_2(R, X, Y) \
116 __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118 #define _FP_FRAC_DEC_2(X, Y) \
119 __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
121 #define _FP_FRAC_CLZ_2(R, X) \
125 __FP_CLZ ((R), X##_f1); \
128 __FP_CLZ ((R), X##_f0); \
129 (R) += _FP_W_TYPE_SIZE; \
135 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
136 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
140 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 #define _FP_FRAC_GT_2(X, Y) \
143 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 #define _FP_FRAC_GE_2(X, Y) \
145 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
147 #define _FP_ZEROFRAC_2 0, 0
148 #define _FP_MINFRAC_2 0, 1
149 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
153 #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
155 #define __FP_CLZ_2(R, xh, xl) \
159 __FP_CLZ ((R), xh); \
162 __FP_CLZ ((R), xl); \
163 (R) += _FP_W_TYPE_SIZE; \
170 # ifndef __FP_FRAC_ADDI_2
171 # define __FP_FRAC_ADDI_2(xh, xl, i) \
172 (xh += ((xl += i) < i))
174 # ifndef __FP_FRAC_ADD_2
175 # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
176 (rh = xh + yh + ((rl = xl + yl) < xl))
178 # ifndef __FP_FRAC_SUB_2
179 # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
180 (rh = xh - yh - ((rl = xl - yl) > xl))
182 # ifndef __FP_FRAC_DEC_2
183 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
186 UWtype __FP_FRAC_DEC_2_t = xl; \
187 xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
194 # undef __FP_FRAC_ADDI_2
195 # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
196 # undef __FP_FRAC_ADD_2
197 # define __FP_FRAC_ADD_2 add_ssaaaa
198 # undef __FP_FRAC_SUB_2
199 # define __FP_FRAC_SUB_2 sub_ddmmss
200 # undef __FP_FRAC_DEC_2
201 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
202 sub_ddmmss (xh, xl, xh, xl, yh, yl)
206 /* Unpack the raw bits of a native fp value. Do not classify or
207 normalize the data. */
209 #define _FP_UNPACK_RAW_2(fs, X, val) \
212 union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
213 _FP_UNPACK_RAW_2_flo.flt = (val); \
215 X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
216 X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
217 X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
218 X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
222 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
225 union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
226 = (union _FP_UNION_##fs *) (val); \
228 X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
229 X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
230 X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
231 X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
236 /* Repack the raw bits of a native fp value. */
238 #define _FP_PACK_RAW_2(fs, val, X) \
241 union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
243 _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
244 _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
245 _FP_PACK_RAW_2_flo.bits.exp = X##_e; \
246 _FP_PACK_RAW_2_flo.bits.sign = X##_s; \
248 (val) = _FP_PACK_RAW_2_flo.flt; \
252 #define _FP_PACK_RAW_2_P(fs, val, X) \
255 union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
256 = (union _FP_UNION_##fs *) (val); \
258 _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
259 _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
260 _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
261 _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
266 /* Multiplication algorithms: */
268 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
270 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
273 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
274 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
276 doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
278 doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
280 doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
282 doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
285 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
286 _FP_FRAC_WORD_4 (R, 1), 0, \
287 _FP_MUL_MEAT_DW_2_wide_b_f1, \
288 _FP_MUL_MEAT_DW_2_wide_b_f0, \
289 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290 _FP_FRAC_WORD_4 (R, 1)); \
291 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
292 _FP_FRAC_WORD_4 (R, 1), 0, \
293 _FP_MUL_MEAT_DW_2_wide_c_f1, \
294 _FP_MUL_MEAT_DW_2_wide_c_f0, \
295 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
296 _FP_FRAC_WORD_4 (R, 1)); \
300 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
303 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
305 _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
308 /* Normalize since we know where the msb of the multiplicands \
309 were (bit B), we know that the msb of the of the product is \
310 at either 2B or 2B-1. */ \
311 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
313 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
314 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
318 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
319 Do only 3 multiplications instead of four. This one is for machines
320 where multiplication is much more expensive than subtraction. */
322 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
325 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
326 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
327 _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
328 int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
329 int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
331 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
332 _FP_MUL_MEAT_DW_2_wide_3mul_c1 \
333 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
334 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
335 _FP_MUL_MEAT_DW_2_wide_3mul_c2 \
336 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
337 doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
339 doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
340 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
341 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
342 doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
343 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
345 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
346 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
347 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
348 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
349 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
350 _FP_FRAC_WORD_4 (R, 1), \
351 (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
352 & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
353 _FP_MUL_MEAT_DW_2_wide_3mul_d, \
354 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
355 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
356 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
357 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
358 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
359 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
360 _FP_FRAC_WORD_4 (R, 1), \
361 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
362 _FP_FRAC_WORD_4 (R, 0)); \
363 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
364 _FP_FRAC_WORD_4 (R, 1), 0, \
365 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
366 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
367 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
368 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
369 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
370 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
374 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
377 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
379 _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
380 _FP_MUL_MEAT_2_wide_3mul_z, \
383 /* Normalize since we know where the msb of the multiplicands \
384 were (bit B), we know that the msb of the of the product is \
385 at either 2B or 2B-1. */ \
386 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
387 (wfracbits)-1, 2*(wfracbits)); \
388 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
389 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
393 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
396 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
397 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
398 _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
399 _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
400 _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
401 _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
403 mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
404 _FP_MUL_MEAT_DW_2_gmp_y, 2); \
408 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
411 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
413 _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
415 /* Normalize since we know where the msb of the multiplicands \
416 were (bit B), we know that the msb of the of the product is \
417 at either 2B or 2B-1. */ \
418 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
420 R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
421 R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
425 /* Do at most 120x120=240 bits multiplication using double floating
426 point multiplication. This is useful if floating point
427 multiplication has much bigger throughput than integer multiply.
428 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
429 between 106 and 120 only.
430 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
431 SETFETZ is a macro which will disable all FPU exceptions and set rounding
432 towards zero, RESETFE should optionally reset it back. */
434 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
437 static const double _const[] = \
439 /* 2^-24 */ 5.9604644775390625e-08, \
440 /* 2^-48 */ 3.5527136788005009e-15, \
441 /* 2^-72 */ 2.1175823681357508e-22, \
442 /* 2^-96 */ 1.2621774483536189e-29, \
443 /* 2^28 */ 2.68435456e+08, \
444 /* 2^4 */ 1.600000e+01, \
445 /* 2^-20 */ 9.5367431640625e-07, \
446 /* 2^-44 */ 5.6843418860808015e-14, \
447 /* 2^-68 */ 3.3881317890172014e-21, \
448 /* 2^-92 */ 2.0194839173657902e-28, \
449 /* 2^-116 */ 1.2037062152420224e-35 \
451 double _a240, _b240, _c240, _d240, _e240, _f240, \
452 _g240, _h240, _i240, _j240, _k240; \
453 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
454 _p240, _q240, _r240, _s240; \
455 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
457 _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
458 "wfracbits out of range"); \
462 _e240 = (double) (long) (X##_f0 & 0xffffff); \
463 _j240 = (double) (long) (Y##_f0 & 0xffffff); \
464 _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
465 _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
466 _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
467 _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
468 _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
469 _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
470 _a240 = (double) (long) (X##_f1 >> 32); \
471 _f240 = (double) (long) (Y##_f1 >> 32); \
472 _e240 *= _const[3]; \
473 _j240 *= _const[3]; \
474 _d240 *= _const[2]; \
475 _i240 *= _const[2]; \
476 _c240 *= _const[1]; \
477 _h240 *= _const[1]; \
478 _b240 *= _const[0]; \
479 _g240 *= _const[0]; \
480 _s240.d = _e240*_j240; \
481 _r240.d = _d240*_j240 + _e240*_i240; \
482 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
483 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
484 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
485 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
486 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
487 _l240.d = _a240*_g240 + _b240*_f240; \
488 _k240 = _a240*_f240; \
489 _r240.d += _s240.d; \
490 _q240.d += _r240.d; \
491 _p240.d += _q240.d; \
492 _o240.d += _p240.d; \
493 _n240.d += _o240.d; \
494 _m240.d += _n240.d; \
495 _l240.d += _m240.d; \
497 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
498 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
499 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
500 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
501 _o240.d += _const[7]; \
502 _n240.d += _const[6]; \
503 _m240.d += _const[5]; \
504 _l240.d += _const[4]; \
505 if (_s240.d != 0.0) \
507 if (_r240.d != 0.0) \
509 if (_q240.d != 0.0) \
511 if (_p240.d != 0.0) \
513 _t240 = (DItype) _k240; \
518 R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
519 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
520 R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
521 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
522 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
523 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
529 /* Division algorithms: */
531 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
534 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
535 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
536 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
537 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
538 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
539 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
540 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
541 if (_FP_FRAC_GE_2 (X, Y)) \
543 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
544 _FP_DIV_MEAT_2_udiv_n_f1 \
545 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
546 _FP_DIV_MEAT_2_udiv_n_f0 \
547 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
552 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
553 _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
554 _FP_DIV_MEAT_2_udiv_n_f0 = 0; \
557 /* Normalize, i.e. make the most significant bit of the \
558 denominator set. */ \
559 _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
561 udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
562 _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
564 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
566 _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
567 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
570 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
571 _FP_DIV_MEAT_2_udiv_r); \
572 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
573 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
574 _FP_DIV_MEAT_2_udiv_r)) \
577 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
578 _FP_DIV_MEAT_2_udiv_r); \
581 _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
583 if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
585 /* This is a special case, not an optimization \
586 (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
587 As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
588 R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
589 know what kind of bits it is (sticky, guard, round), \
590 we don't care. We also don't care what the reminder is, \
591 because the guard bit will be set anyway. -jj */ \
596 udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
597 _FP_DIV_MEAT_2_udiv_r_f1, \
598 _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
599 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
600 _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
601 _FP_DIV_MEAT_2_udiv_r_f0 = 0; \
602 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
603 _FP_DIV_MEAT_2_udiv_r)) \
606 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
607 _FP_DIV_MEAT_2_udiv_r); \
608 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
609 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
610 _FP_DIV_MEAT_2_udiv_r)) \
613 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
614 _FP_DIV_MEAT_2_udiv_r); \
617 if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
618 _FP_DIV_MEAT_2_udiv_m)) \
619 R##_f0 |= _FP_WORK_STICKY; \
625 /* Square root algorithms:
626 We have just one right now, maybe Newton approximation
627 should be added for those machines where division is fast. */
629 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
634 T##_f1 = S##_f1 + (q); \
635 if (T##_f1 <= X##_f1) \
637 S##_f1 = T##_f1 + (q); \
641 _FP_FRAC_SLL_2 (X, 1); \
644 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
645 while ((q) != _FP_WORK_ROUND) \
647 T##_f0 = S##_f0 + (q); \
649 if (T##_f1 < X##_f1 \
650 || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
652 S##_f0 = T##_f0 + (q); \
653 S##_f1 += (T##_f0 > S##_f0); \
654 _FP_FRAC_DEC_2 (X, T); \
657 _FP_FRAC_SLL_2 (X, 1); \
660 if (X##_f0 | X##_f1) \
662 if (S##_f1 < X##_f1 \
663 || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
664 R##_f0 |= _FP_WORK_ROUND; \
665 R##_f0 |= _FP_WORK_STICKY; \
671 /* Assembly/disassembly for converting to/from integral types.
672 No shifting or overflow handled here. */
674 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
675 (void) (((rsize) <= _FP_W_TYPE_SIZE) \
676 ? ({ (r) = X##_f0; }) \
679 (r) <<= _FP_W_TYPE_SIZE; \
683 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
687 X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
689 : (r) >> _FP_W_TYPE_SIZE); \
693 /* Convert FP values between word sizes. */
695 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
697 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
699 #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
701 #endif /* !SOFT_FP_OP_2_H */