soft-fp: Add FP_DENORM_ZERO.
[glibc.git] / soft-fp / op-2.h
blob6a6f93876b0a23866629dc69ebbc5e71731999b8
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
36 #define _FP_FRAC_HIGH_2(X) (X##_f1)
37 #define _FP_FRAC_LOW_2(X) (X##_f0)
38 #define _FP_FRAC_WORD_2(X, w) (X##_f##w)
40 #define _FP_FRAC_SLL_2(X, N) \
41 (void) (((N) < _FP_W_TYPE_SIZE) \
42 ? ({ \
43 if (__builtin_constant_p (N) && (N) == 1) \
44 { \
45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46 X##_f0 += X##_f0; \
47 } \
48 else \
49 { \
50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 X##_f0 <<= (N); \
52 } \
53 0; \
54 }) \
55 : ({ \
56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57 X##_f0 = 0; \
58 }))
61 #define _FP_FRAC_SRL_2(X, N) \
62 (void) (((N) < _FP_W_TYPE_SIZE) \
63 ? ({ \
64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 X##_f1 >>= (N); \
66 }) \
67 : ({ \
68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69 X##_f1 = 0; \
70 }))
72 /* Right shift with sticky-lsb. */
73 #define _FP_FRAC_SRST_2(X, S, N, sz) \
74 (void) (((N) < _FP_W_TYPE_SIZE) \
75 ? ({ \
76 S = (__builtin_constant_p (N) && (N) == 1 \
77 ? X##_f0 & 1 \
78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 X##_f1 >>= (N); \
81 }) \
82 : ({ \
83 S = ((((N) == _FP_W_TYPE_SIZE \
84 ? 0 \
85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86 | X##_f0) != 0); \
87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88 X##_f1 = 0; \
89 }))
91 #define _FP_FRAC_SRS_2(X, N, sz) \
92 (void) (((N) < _FP_W_TYPE_SIZE) \
93 ? ({ \
94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95 | (__builtin_constant_p (N) && (N) == 1 \
96 ? X##_f0 & 1 \
97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 X##_f1 >>= (N); \
99 }) \
100 : ({ \
101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
102 | ((((N) == _FP_W_TYPE_SIZE \
103 ? 0 \
104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105 | X##_f0) != 0)); \
106 X##_f1 = 0; \
109 #define _FP_FRAC_ADDI_2(X, I) \
110 __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
112 #define _FP_FRAC_ADD_2(R, X, Y) \
113 __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
115 #define _FP_FRAC_SUB_2(R, X, Y) \
116 __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118 #define _FP_FRAC_DEC_2(X, Y) \
119 __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
121 #define _FP_FRAC_CLZ_2(R, X) \
122 do \
124 if (X##_f1) \
125 __FP_CLZ (R, X##_f1); \
126 else \
128 __FP_CLZ (R, X##_f0); \
129 R += _FP_W_TYPE_SIZE; \
132 while (0)
134 /* Predicates. */
135 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
136 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
140 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 #define _FP_FRAC_GT_2(X, Y) \
143 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 #define _FP_FRAC_GE_2(X, Y) \
145 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
147 #define _FP_ZEROFRAC_2 0, 0
148 #define _FP_MINFRAC_2 0, 1
149 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
151 /* Internals. */
153 #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
155 #define __FP_CLZ_2(R, xh, xl) \
156 do \
158 if (xh) \
159 __FP_CLZ (R, xh); \
160 else \
162 __FP_CLZ (R, xl); \
163 R += _FP_W_TYPE_SIZE; \
166 while (0)
168 #if 0
170 # ifndef __FP_FRAC_ADDI_2
171 # define __FP_FRAC_ADDI_2(xh, xl, i) \
172 (xh += ((xl += i) < i))
173 # endif
174 # ifndef __FP_FRAC_ADD_2
175 # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
176 (rh = xh + yh + ((rl = xl + yl) < xl))
177 # endif
178 # ifndef __FP_FRAC_SUB_2
179 # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
180 (rh = xh - yh - ((rl = xl - yl) > xl))
181 # endif
182 # ifndef __FP_FRAC_DEC_2
183 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
184 do \
186 UWtype __FP_FRAC_DEC_2_t = xl; \
187 xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
189 while (0)
190 # endif
192 #else
194 # undef __FP_FRAC_ADDI_2
195 # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
196 # undef __FP_FRAC_ADD_2
197 # define __FP_FRAC_ADD_2 add_ssaaaa
198 # undef __FP_FRAC_SUB_2
199 # define __FP_FRAC_SUB_2 sub_ddmmss
200 # undef __FP_FRAC_DEC_2
201 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
202 sub_ddmmss (xh, xl, xh, xl, yh, yl)
204 #endif
206 /* Unpack the raw bits of a native fp value. Do not classify or
207 normalize the data. */
209 #define _FP_UNPACK_RAW_2(fs, X, val) \
210 do \
212 union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
213 _FP_UNPACK_RAW_2_flo.flt = (val); \
215 X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
216 X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
217 X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
218 X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
220 while (0)
222 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
223 do \
225 union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
226 = (union _FP_UNION_##fs *) (val); \
228 X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
229 X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
230 X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
231 X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
233 while (0)
236 /* Repack the raw bits of a native fp value. */
238 #define _FP_PACK_RAW_2(fs, val, X) \
239 do \
241 union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
243 _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
244 _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
245 _FP_PACK_RAW_2_flo.bits.exp = X##_e; \
246 _FP_PACK_RAW_2_flo.bits.sign = X##_s; \
248 (val) = _FP_PACK_RAW_2_flo.flt; \
250 while (0)
252 #define _FP_PACK_RAW_2_P(fs, val, X) \
253 do \
255 union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
256 = (union _FP_UNION_##fs *) (val); \
258 _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
259 _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
260 _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
261 _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
263 while (0)
266 /* Multiplication algorithms: */
268 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
270 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
271 do \
273 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
274 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
276 doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
277 X##_f0, Y##_f0); \
278 doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
279 X##_f0, Y##_f1); \
280 doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
281 X##_f1, Y##_f0); \
282 doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
283 X##_f1, Y##_f1); \
285 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
286 _FP_FRAC_WORD_4 (R, 1), 0, \
287 _FP_MUL_MEAT_DW_2_wide_b_f1, \
288 _FP_MUL_MEAT_DW_2_wide_b_f0, \
289 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290 _FP_FRAC_WORD_4 (R, 1)); \
291 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
292 _FP_FRAC_WORD_4 (R, 1), 0, \
293 _FP_MUL_MEAT_DW_2_wide_c_f1, \
294 _FP_MUL_MEAT_DW_2_wide_c_f0, \
295 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
296 _FP_FRAC_WORD_4 (R, 1)); \
298 while (0)
300 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
301 do \
303 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
305 _FP_MUL_MEAT_DW_2_wide (wfracbits, _FP_MUL_MEAT_2_wide_z, \
306 X, Y, doit); \
308 /* Normalize since we know where the msb of the multiplicands \
309 were (bit B), we know that the msb of the of the product is \
310 at either 2B or 2B-1. */ \
311 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, wfracbits-1, 2*wfracbits); \
312 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
313 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
315 while (0)
317 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
318 Do only 3 multiplications instead of four. This one is for machines
319 where multiplication is much more expensive than subtraction. */
321 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
322 do \
324 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
325 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
326 _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
327 int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
328 int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
330 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
331 _FP_MUL_MEAT_DW_2_wide_3mul_c1 \
332 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
333 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
334 _FP_MUL_MEAT_DW_2_wide_3mul_c2 \
335 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
336 doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
337 X##_f0, Y##_f0); \
338 doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
339 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
340 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
341 doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
342 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
344 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
345 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
346 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
347 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
348 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
349 _FP_FRAC_WORD_4 (R, 1), \
350 (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
351 & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
352 _FP_MUL_MEAT_DW_2_wide_3mul_d, \
353 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
354 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
355 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
356 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
357 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
358 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
359 _FP_FRAC_WORD_4 (R, 1), \
360 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
361 _FP_FRAC_WORD_4 (R, 0)); \
362 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
363 _FP_FRAC_WORD_4 (R, 1), 0, \
364 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
365 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
366 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
367 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
368 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
369 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
371 while (0)
373 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
374 do \
376 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
378 _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, \
379 _FP_MUL_MEAT_2_wide_3mul_z, \
380 X, Y, doit); \
382 /* Normalize since we know where the msb of the multiplicands \
383 were (bit B), we know that the msb of the of the product is \
384 at either 2B or 2B-1. */ \
385 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
386 wfracbits-1, 2*wfracbits); \
387 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
388 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
390 while (0)
392 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
393 do \
395 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
396 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
397 _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
398 _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
399 _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
400 _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
402 mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
403 _FP_MUL_MEAT_DW_2_gmp_y, 2); \
405 while (0)
407 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
408 do \
410 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
412 _FP_MUL_MEAT_DW_2_gmp (wfracbits, _FP_MUL_MEAT_2_gmp_z, X, Y); \
414 /* Normalize since we know where the msb of the multiplicands \
415 were (bit B), we know that the msb of the of the product is \
416 at either 2B or 2B-1. */ \
417 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, wfracbits-1, 2*wfracbits); \
418 R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
419 R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
421 while (0)
423 /* Do at most 120x120=240 bits multiplication using double floating
424 point multiplication. This is useful if floating point
425 multiplication has much bigger throughput than integer multiply.
426 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
427 between 106 and 120 only.
428 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
429 SETFETZ is a macro which will disable all FPU exceptions and set rounding
430 towards zero, RESETFE should optionally reset it back. */
432 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
433 do \
435 static const double _const[] = \
437 /* 2^-24 */ 5.9604644775390625e-08, \
438 /* 2^-48 */ 3.5527136788005009e-15, \
439 /* 2^-72 */ 2.1175823681357508e-22, \
440 /* 2^-96 */ 1.2621774483536189e-29, \
441 /* 2^28 */ 2.68435456e+08, \
442 /* 2^4 */ 1.600000e+01, \
443 /* 2^-20 */ 9.5367431640625e-07, \
444 /* 2^-44 */ 5.6843418860808015e-14, \
445 /* 2^-68 */ 3.3881317890172014e-21, \
446 /* 2^-92 */ 2.0194839173657902e-28, \
447 /* 2^-116 */ 1.2037062152420224e-35 \
448 }; \
449 double _a240, _b240, _c240, _d240, _e240, _f240, \
450 _g240, _h240, _i240, _j240, _k240; \
451 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
452 _p240, _q240, _r240, _s240; \
453 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
455 if (wfracbits < 106 || wfracbits > 120) \
456 abort (); \
458 setfetz; \
460 _e240 = (double) (long) (X##_f0 & 0xffffff); \
461 _j240 = (double) (long) (Y##_f0 & 0xffffff); \
462 _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
463 _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
464 _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
465 _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
466 _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
467 _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
468 _a240 = (double) (long) (X##_f1 >> 32); \
469 _f240 = (double) (long) (Y##_f1 >> 32); \
470 _e240 *= _const[3]; \
471 _j240 *= _const[3]; \
472 _d240 *= _const[2]; \
473 _i240 *= _const[2]; \
474 _c240 *= _const[1]; \
475 _h240 *= _const[1]; \
476 _b240 *= _const[0]; \
477 _g240 *= _const[0]; \
478 _s240.d = _e240*_j240; \
479 _r240.d = _d240*_j240 + _e240*_i240; \
480 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
481 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
482 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
483 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
484 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
485 _l240.d = _a240*_g240 + _b240*_f240; \
486 _k240 = _a240*_f240; \
487 _r240.d += _s240.d; \
488 _q240.d += _r240.d; \
489 _p240.d += _q240.d; \
490 _o240.d += _p240.d; \
491 _n240.d += _o240.d; \
492 _m240.d += _n240.d; \
493 _l240.d += _m240.d; \
494 _k240 += _l240.d; \
495 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
496 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
497 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
498 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
499 _o240.d += _const[7]; \
500 _n240.d += _const[6]; \
501 _m240.d += _const[5]; \
502 _l240.d += _const[4]; \
503 if (_s240.d != 0.0) \
504 _y240 = 1; \
505 if (_r240.d != 0.0) \
506 _y240 = 1; \
507 if (_q240.d != 0.0) \
508 _y240 = 1; \
509 if (_p240.d != 0.0) \
510 _y240 = 1; \
511 _t240 = (DItype) _k240; \
512 _u240 = _l240.i; \
513 _v240 = _m240.i; \
514 _w240 = _n240.i; \
515 _x240 = _o240.i; \
516 R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
517 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
518 R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
519 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
520 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
521 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
522 | _y240); \
523 resetfe; \
525 while (0)
527 /* Division algorithms: */
529 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
530 do \
532 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
533 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
534 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
535 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
536 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
537 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
538 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
539 if (_FP_FRAC_GE_2 (X, Y)) \
541 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
542 _FP_DIV_MEAT_2_udiv_n_f1 \
543 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
544 _FP_DIV_MEAT_2_udiv_n_f0 \
545 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
547 else \
549 R##_e--; \
550 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
551 _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
552 _FP_DIV_MEAT_2_udiv_n_f0 = 0; \
555 /* Normalize, i.e. make the most significant bit of the \
556 denominator set. */ \
557 _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
559 udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
560 _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
561 Y##_f1); \
562 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
563 R##_f1, Y##_f0); \
564 _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
565 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
567 R##_f1--; \
568 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
569 _FP_DIV_MEAT_2_udiv_r); \
570 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
571 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
572 _FP_DIV_MEAT_2_udiv_r)) \
574 R##_f1--; \
575 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
576 _FP_DIV_MEAT_2_udiv_r); \
579 _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
581 if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
583 /* This is a special case, not an optimization \
584 (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
585 As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
586 R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
587 know what kind of bits it is (sticky, guard, round), \
588 we don't care. We also don't care what the reminder is, \
589 because the guard bit will be set anyway. -jj */ \
590 R##_f0 = -1; \
592 else \
594 udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
595 _FP_DIV_MEAT_2_udiv_r_f1, \
596 _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
597 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
598 _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
599 _FP_DIV_MEAT_2_udiv_r_f0 = 0; \
600 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
601 _FP_DIV_MEAT_2_udiv_r)) \
603 R##_f0--; \
604 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
605 _FP_DIV_MEAT_2_udiv_r); \
606 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
607 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
608 _FP_DIV_MEAT_2_udiv_r)) \
610 R##_f0--; \
611 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
612 _FP_DIV_MEAT_2_udiv_r); \
615 if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
616 _FP_DIV_MEAT_2_udiv_m)) \
617 R##_f0 |= _FP_WORK_STICKY; \
620 while (0)
623 /* Square root algorithms:
624 We have just one right now, maybe Newton approximation
625 should be added for those machines where division is fast. */
627 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
628 do \
630 while (q) \
632 T##_f1 = S##_f1 + q; \
633 if (T##_f1 <= X##_f1) \
635 S##_f1 = T##_f1 + q; \
636 X##_f1 -= T##_f1; \
637 R##_f1 += q; \
639 _FP_FRAC_SLL_2 (X, 1); \
640 q >>= 1; \
642 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
643 while (q != _FP_WORK_ROUND) \
645 T##_f0 = S##_f0 + q; \
646 T##_f1 = S##_f1; \
647 if (T##_f1 < X##_f1 \
648 || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
650 S##_f0 = T##_f0 + q; \
651 S##_f1 += (T##_f0 > S##_f0); \
652 _FP_FRAC_DEC_2 (X, T); \
653 R##_f0 += q; \
655 _FP_FRAC_SLL_2 (X, 1); \
656 q >>= 1; \
658 if (X##_f0 | X##_f1) \
660 if (S##_f1 < X##_f1 \
661 || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
662 R##_f0 |= _FP_WORK_ROUND; \
663 R##_f0 |= _FP_WORK_STICKY; \
666 while (0)
669 /* Assembly/disassembly for converting to/from integral types.
670 No shifting or overflow handled here. */
672 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
673 (void) ((rsize <= _FP_W_TYPE_SIZE) \
674 ? ({ r = X##_f0; }) \
675 : ({ \
676 r = X##_f1; \
677 r <<= _FP_W_TYPE_SIZE; \
678 r += X##_f0; \
681 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
682 do \
684 X##_f0 = r; \
685 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
687 while (0)
689 /* Convert FP values between word sizes. */
691 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
693 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
695 #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)