Add more directives to benchmark input files
[glibc.git] / soft-fp / op-4.h
blobf16870d0f734b00eee0cf04e2df18015b63849b7
1 /* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
3 Copyright (C) 1997-2013 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
34 #define _FP_FRAC_COPY_4(D,S) \
35 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
36 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37 #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I)
38 #define _FP_FRAC_HIGH_4(X) (X##_f[3])
39 #define _FP_FRAC_LOW_4(X) (X##_f[0])
40 #define _FP_FRAC_WORD_4(X,w) (X##_f[w])
42 #define _FP_FRAC_SLL_4(X,N) \
43 do { \
44 _FP_I_TYPE _up, _down, _skip, _i; \
45 _skip = (N) / _FP_W_TYPE_SIZE; \
46 _up = (N) % _FP_W_TYPE_SIZE; \
47 _down = _FP_W_TYPE_SIZE - _up; \
48 if (!_up) \
49 for (_i = 3; _i >= _skip; --_i) \
50 X##_f[_i] = X##_f[_i-_skip]; \
51 else \
52 { \
53 for (_i = 3; _i > _skip; --_i) \
54 X##_f[_i] = X##_f[_i-_skip] << _up \
55 | X##_f[_i-_skip-1] >> _down; \
56 X##_f[_i--] = X##_f[0] << _up; \
57 } \
58 for (; _i >= 0; --_i) \
59 X##_f[_i] = 0; \
60 } while (0)
62 /* This one was broken too */
63 #define _FP_FRAC_SRL_4(X,N) \
64 do { \
65 _FP_I_TYPE _up, _down, _skip, _i; \
66 _skip = (N) / _FP_W_TYPE_SIZE; \
67 _down = (N) % _FP_W_TYPE_SIZE; \
68 _up = _FP_W_TYPE_SIZE - _down; \
69 if (!_down) \
70 for (_i = 0; _i <= 3-_skip; ++_i) \
71 X##_f[_i] = X##_f[_i+_skip]; \
72 else \
73 { \
74 for (_i = 0; _i < 3-_skip; ++_i) \
75 X##_f[_i] = X##_f[_i+_skip] >> _down \
76 | X##_f[_i+_skip+1] << _up; \
77 X##_f[_i++] = X##_f[3] >> _down; \
78 } \
79 for (; _i < 4; ++_i) \
80 X##_f[_i] = 0; \
81 } while (0)
84 /* Right shift with sticky-lsb.
85 * What this actually means is that we do a standard right-shift,
86 * but that if any of the bits that fall off the right hand side
87 * were one then we always set the LSbit.
89 #define _FP_FRAC_SRST_4(X,S,N,size) \
90 do { \
91 _FP_I_TYPE _up, _down, _skip, _i; \
92 _FP_W_TYPE _s; \
93 _skip = (N) / _FP_W_TYPE_SIZE; \
94 _down = (N) % _FP_W_TYPE_SIZE; \
95 _up = _FP_W_TYPE_SIZE - _down; \
96 for (_s = _i = 0; _i < _skip; ++_i) \
97 _s |= X##_f[_i]; \
98 if (!_down) \
99 for (_i = 0; _i <= 3-_skip; ++_i) \
100 X##_f[_i] = X##_f[_i+_skip]; \
101 else \
103 _s |= X##_f[_i] << _up; \
104 for (_i = 0; _i < 3-_skip; ++_i) \
105 X##_f[_i] = X##_f[_i+_skip] >> _down \
106 | X##_f[_i+_skip+1] << _up; \
107 X##_f[_i++] = X##_f[3] >> _down; \
109 for (; _i < 4; ++_i) \
110 X##_f[_i] = 0; \
111 S = (_s != 0); \
112 } while (0)
114 #define _FP_FRAC_SRS_4(X,N,size) \
115 do { \
116 int _sticky; \
117 _FP_FRAC_SRST_4(X, _sticky, N, size); \
118 X##_f[0] |= _sticky; \
119 } while (0)
121 #define _FP_FRAC_ADD_4(R,X,Y) \
122 __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
123 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
124 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
126 #define _FP_FRAC_SUB_4(R,X,Y) \
127 __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
128 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
129 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
131 #define _FP_FRAC_DEC_4(X,Y) \
132 __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
133 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
135 #define _FP_FRAC_ADDI_4(X,I) \
136 __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
138 #define _FP_ZEROFRAC_4 0,0,0,0
139 #define _FP_MINFRAC_4 0,0,0,1
140 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
142 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
143 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0)
144 #define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
145 #define _FP_FRAC_HIGHBIT_DW_4(fs,X) \
146 (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
147 #define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
149 #define _FP_FRAC_EQ_4(X,Y) \
150 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
151 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
153 #define _FP_FRAC_GT_4(X,Y) \
154 (X##_f[3] > Y##_f[3] || \
155 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \
156 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \
157 (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \
158 )) \
159 )) \
162 #define _FP_FRAC_GE_4(X,Y) \
163 (X##_f[3] > Y##_f[3] || \
164 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \
165 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \
166 (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \
167 )) \
168 )) \
172 #define _FP_FRAC_CLZ_4(R,X) \
173 do { \
174 if (X##_f[3]) \
176 __FP_CLZ(R,X##_f[3]); \
178 else if (X##_f[2]) \
180 __FP_CLZ(R,X##_f[2]); \
181 R += _FP_W_TYPE_SIZE; \
183 else if (X##_f[1]) \
185 __FP_CLZ(R,X##_f[1]); \
186 R += _FP_W_TYPE_SIZE*2; \
188 else \
190 __FP_CLZ(R,X##_f[0]); \
191 R += _FP_W_TYPE_SIZE*3; \
193 } while(0)
196 #define _FP_UNPACK_RAW_4(fs, X, val) \
197 do { \
198 union _FP_UNION_##fs _flo; _flo.flt = (val); \
199 X##_f[0] = _flo.bits.frac0; \
200 X##_f[1] = _flo.bits.frac1; \
201 X##_f[2] = _flo.bits.frac2; \
202 X##_f[3] = _flo.bits.frac3; \
203 X##_e = _flo.bits.exp; \
204 X##_s = _flo.bits.sign; \
205 } while (0)
207 #define _FP_UNPACK_RAW_4_P(fs, X, val) \
208 do { \
209 union _FP_UNION_##fs *_flo = \
210 (union _FP_UNION_##fs *)(val); \
212 X##_f[0] = _flo->bits.frac0; \
213 X##_f[1] = _flo->bits.frac1; \
214 X##_f[2] = _flo->bits.frac2; \
215 X##_f[3] = _flo->bits.frac3; \
216 X##_e = _flo->bits.exp; \
217 X##_s = _flo->bits.sign; \
218 } while (0)
220 #define _FP_PACK_RAW_4(fs, val, X) \
221 do { \
222 union _FP_UNION_##fs _flo; \
223 _flo.bits.frac0 = X##_f[0]; \
224 _flo.bits.frac1 = X##_f[1]; \
225 _flo.bits.frac2 = X##_f[2]; \
226 _flo.bits.frac3 = X##_f[3]; \
227 _flo.bits.exp = X##_e; \
228 _flo.bits.sign = X##_s; \
229 (val) = _flo.flt; \
230 } while (0)
232 #define _FP_PACK_RAW_4_P(fs, val, X) \
233 do { \
234 union _FP_UNION_##fs *_flo = \
235 (union _FP_UNION_##fs *)(val); \
237 _flo->bits.frac0 = X##_f[0]; \
238 _flo->bits.frac1 = X##_f[1]; \
239 _flo->bits.frac2 = X##_f[2]; \
240 _flo->bits.frac3 = X##_f[3]; \
241 _flo->bits.exp = X##_e; \
242 _flo->bits.sign = X##_s; \
243 } while (0)
246 * Multiplication algorithms:
249 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
251 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
252 do { \
253 _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
254 _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \
256 doit(_FP_FRAC_WORD_8(R,1), _FP_FRAC_WORD_8(R,0), X##_f[0], Y##_f[0]); \
257 doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \
258 doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \
259 doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \
260 doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \
261 doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \
262 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2), \
263 _FP_FRAC_WORD_8(R,1), 0,_b_f1,_b_f0, \
264 0,0,_FP_FRAC_WORD_8(R,1)); \
265 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2), \
266 _FP_FRAC_WORD_8(R,1), 0,_c_f1,_c_f0, \
267 _FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2), \
268 _FP_FRAC_WORD_8(R,1)); \
269 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
270 _FP_FRAC_WORD_8(R,2), 0,_d_f1,_d_f0, \
271 0,_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2)); \
272 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
273 _FP_FRAC_WORD_8(R,2), 0,_e_f1,_e_f0, \
274 _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
275 _FP_FRAC_WORD_8(R,2)); \
276 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
277 _FP_FRAC_WORD_8(R,2), 0,_f_f1,_f_f0, \
278 _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
279 _FP_FRAC_WORD_8(R,2)); \
280 doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \
281 doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \
282 doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \
283 doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \
284 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
285 _FP_FRAC_WORD_8(R,3), 0,_b_f1,_b_f0, \
286 0,_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3)); \
287 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
288 _FP_FRAC_WORD_8(R,3), 0,_c_f1,_c_f0, \
289 _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
290 _FP_FRAC_WORD_8(R,3)); \
291 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
292 _FP_FRAC_WORD_8(R,3), 0,_d_f1,_d_f0, \
293 _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
294 _FP_FRAC_WORD_8(R,3)); \
295 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
296 _FP_FRAC_WORD_8(R,3), 0,_e_f1,_e_f0, \
297 _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
298 _FP_FRAC_WORD_8(R,3)); \
299 doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \
300 doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \
301 doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \
302 doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \
303 doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \
304 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
305 _FP_FRAC_WORD_8(R,4), 0,_b_f1,_b_f0, \
306 0,_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4)); \
307 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
308 _FP_FRAC_WORD_8(R,4), 0,_c_f1,_c_f0, \
309 _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
310 _FP_FRAC_WORD_8(R,4)); \
311 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
312 _FP_FRAC_WORD_8(R,4), 0,_d_f1,_d_f0, \
313 _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
314 _FP_FRAC_WORD_8(R,4)); \
315 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
316 _FP_FRAC_WORD_8(R,5), 0,_e_f1,_e_f0, \
317 0,_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5)); \
318 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
319 _FP_FRAC_WORD_8(R,5), 0,_f_f1,_f_f0, \
320 _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
321 _FP_FRAC_WORD_8(R,5)); \
322 doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \
323 __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
324 _b_f1,_b_f0, \
325 _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6)); \
326 } while (0)
328 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
329 do { \
330 _FP_FRAC_DECL_8(_z); \
332 _FP_MUL_MEAT_DW_4_wide(wfracbits, _z, X, Y, doit); \
334 /* Normalize since we know where the msb of the multiplicands \
335 were (bit B), we know that the msb of the of the product is \
336 at either 2B or 2B-1. */ \
337 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \
338 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \
339 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \
340 } while (0)
342 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
343 do { \
344 mpn_mul_n(R##_f, _x_f, _y_f, 4); \
345 } while (0)
347 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
348 do { \
349 _FP_FRAC_DECL_8(_z); \
351 _FP_MUL_MEAT_DW_4_gmp(wfracbits, _z, X, Y); \
353 /* Normalize since we know where the msb of the multiplicands \
354 were (bit B), we know that the msb of the of the product is \
355 at either 2B or 2B-1. */ \
356 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \
357 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \
358 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \
359 } while (0)
362 * Helper utility for _FP_DIV_MEAT_4_udiv:
363 * pppp = m * nnn
365 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \
366 do { \
367 UWtype _t; \
368 umul_ppmm(p1,p0,m,n0); \
369 umul_ppmm(p2,_t,m,n1); \
370 __FP_FRAC_ADDI_2(p2,p1,_t); \
371 umul_ppmm(p3,_t,m,n2); \
372 __FP_FRAC_ADDI_2(p3,p2,_t); \
373 } while (0)
376 * Division algorithms:
379 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
380 do { \
381 int _i; \
382 _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \
383 _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \
384 if (_FP_FRAC_GT_4(X, Y)) \
386 _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
387 _FP_FRAC_SRL_4(X, 1); \
389 else \
390 R##_e--; \
392 /* Normalize, i.e. make the most significant bit of the \
393 denominator set. */ \
394 _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \
396 for (_i = 3; ; _i--) \
398 if (X##_f[3] == Y##_f[3]) \
400 /* This is a special case, not an optimization \
401 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
402 As X## is guaranteed to be < Y, R##_f[_i] can be either \
403 (UWtype)-1 or (UWtype)-2. */ \
404 R##_f[_i] = -1; \
405 if (!_i) \
406 break; \
407 __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
408 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
409 X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \
410 _FP_FRAC_SUB_4(X, Y, X); \
411 if (X##_f[3] > Y##_f[3]) \
413 R##_f[_i] = -2; \
414 _FP_FRAC_ADD_4(X, Y, X); \
417 else \
419 udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
420 umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \
421 R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \
422 X##_f[2] = X##_f[1]; \
423 X##_f[1] = X##_f[0]; \
424 X##_f[0] = _n_f[_i]; \
425 if (_FP_FRAC_GT_4(_m, X)) \
427 R##_f[_i]--; \
428 _FP_FRAC_ADD_4(X, Y, X); \
429 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \
431 R##_f[_i]--; \
432 _FP_FRAC_ADD_4(X, Y, X); \
435 _FP_FRAC_DEC_4(X, _m); \
436 if (!_i) \
438 if (!_FP_FRAC_EQ_4(X, _m)) \
439 R##_f[0] |= _FP_WORK_STICKY; \
440 break; \
444 } while (0)
448 * Square root algorithms:
449 * We have just one right now, maybe Newton approximation
450 * should be added for those machines where division is fast.
453 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
454 do { \
455 while (q) \
457 T##_f[3] = S##_f[3] + q; \
458 if (T##_f[3] <= X##_f[3]) \
460 S##_f[3] = T##_f[3] + q; \
461 X##_f[3] -= T##_f[3]; \
462 R##_f[3] += q; \
464 _FP_FRAC_SLL_4(X, 1); \
465 q >>= 1; \
467 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
468 while (q) \
470 T##_f[2] = S##_f[2] + q; \
471 T##_f[3] = S##_f[3]; \
472 if (T##_f[3] < X##_f[3] || \
473 (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
475 S##_f[2] = T##_f[2] + q; \
476 S##_f[3] += (T##_f[2] > S##_f[2]); \
477 __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \
478 T##_f[3], T##_f[2]); \
479 R##_f[2] += q; \
481 _FP_FRAC_SLL_4(X, 1); \
482 q >>= 1; \
484 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
485 while (q) \
487 T##_f[1] = S##_f[1] + q; \
488 T##_f[2] = S##_f[2]; \
489 T##_f[3] = S##_f[3]; \
490 if (T##_f[3] < X##_f[3] || \
491 (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \
492 (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \
494 S##_f[1] = T##_f[1] + q; \
495 S##_f[2] += (T##_f[1] > S##_f[1]); \
496 S##_f[3] += (T##_f[2] > S##_f[2]); \
497 __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \
498 T##_f[3], T##_f[2], T##_f[1]); \
499 R##_f[1] += q; \
501 _FP_FRAC_SLL_4(X, 1); \
502 q >>= 1; \
504 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
505 while (q != _FP_WORK_ROUND) \
507 T##_f[0] = S##_f[0] + q; \
508 T##_f[1] = S##_f[1]; \
509 T##_f[2] = S##_f[2]; \
510 T##_f[3] = S##_f[3]; \
511 if (_FP_FRAC_GE_4(X,T)) \
513 S##_f[0] = T##_f[0] + q; \
514 S##_f[1] += (T##_f[0] > S##_f[0]); \
515 S##_f[2] += (T##_f[1] > S##_f[1]); \
516 S##_f[3] += (T##_f[2] > S##_f[2]); \
517 _FP_FRAC_DEC_4(X, T); \
518 R##_f[0] += q; \
520 _FP_FRAC_SLL_4(X, 1); \
521 q >>= 1; \
523 if (!_FP_FRAC_ZEROP_4(X)) \
525 if (_FP_FRAC_GT_4(X,S)) \
526 R##_f[0] |= _FP_WORK_ROUND; \
527 R##_f[0] |= _FP_WORK_STICKY; \
529 } while (0)
533 * Internals
536 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \
537 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
539 #ifndef __FP_FRAC_ADD_3
540 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
541 do { \
542 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
543 r0 = x0 + y0; \
544 __FP_FRAC_ADD_3_c1 = r0 < x0; \
545 r1 = x1 + y1; \
546 __FP_FRAC_ADD_3_c2 = r1 < x1; \
547 r1 += __FP_FRAC_ADD_3_c1; \
548 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
549 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
550 } while (0)
551 #endif
553 #ifndef __FP_FRAC_ADD_4
554 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
555 do { \
556 _FP_W_TYPE _c1, _c2, _c3; \
557 r0 = x0 + y0; \
558 _c1 = r0 < x0; \
559 r1 = x1 + y1; \
560 _c2 = r1 < x1; \
561 r1 += _c1; \
562 _c2 |= r1 < _c1; \
563 r2 = x2 + y2; \
564 _c3 = r2 < x2; \
565 r2 += _c2; \
566 _c3 |= r2 < _c2; \
567 r3 = x3 + y3 + _c3; \
568 } while (0)
569 #endif
571 #ifndef __FP_FRAC_SUB_3
572 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
573 do { \
574 _FP_W_TYPE _c1, _c2; \
575 r0 = x0 - y0; \
576 _c1 = r0 > x0; \
577 r1 = x1 - y1; \
578 _c2 = r1 > x1; \
579 r1 -= _c1; \
580 _c2 |= _c1 && (y1 == x1); \
581 r2 = x2 - y2 - _c2; \
582 } while (0)
583 #endif
585 #ifndef __FP_FRAC_SUB_4
586 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
587 do { \
588 _FP_W_TYPE _c1, _c2, _c3; \
589 r0 = x0 - y0; \
590 _c1 = r0 > x0; \
591 r1 = x1 - y1; \
592 _c2 = r1 > x1; \
593 r1 -= _c1; \
594 _c2 |= _c1 && (y1 == x1); \
595 r2 = x2 - y2; \
596 _c3 = r2 > x2; \
597 r2 -= _c2; \
598 _c3 |= _c2 && (y2 == x2); \
599 r3 = x3 - y3 - _c3; \
600 } while (0)
601 #endif
603 #ifndef __FP_FRAC_DEC_3
604 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \
605 do { \
606 UWtype _t0, _t1, _t2; \
607 _t0 = x0, _t1 = x1, _t2 = x2; \
608 __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \
609 } while (0)
610 #endif
612 #ifndef __FP_FRAC_DEC_4
613 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \
614 do { \
615 UWtype _t0, _t1, _t2, _t3; \
616 _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \
617 __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \
618 } while (0)
619 #endif
621 #ifndef __FP_FRAC_ADDI_4
622 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
623 do { \
624 UWtype _t; \
625 _t = ((x0 += i) < i); \
626 x1 += _t; _t = (x1 < _t); \
627 x2 += _t; _t = (x2 < _t); \
628 x3 += _t; \
629 } while (0)
630 #endif
632 /* Convert FP values between word sizes. This appears to be more
633 * complicated than I'd have expected it to be, so these might be
634 * wrong... These macros are in any case somewhat bogus because they
635 * use information about what various FRAC_n variables look like
636 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
637 * the ones in op-2.h and op-1.h.
639 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
641 #define _FP_FRAC_COPY_2_4(D, S) \
642 do { \
643 D##_f0 = S##_f[0]; \
644 D##_f1 = S##_f[1]; \
645 } while (0)
647 /* Assembly/disassembly for converting to/from integral types.
648 * No shifting or overflow handled here.
650 /* Put the FP value X into r, which is an integer of size rsize. */
651 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
652 do { \
653 if (rsize <= _FP_W_TYPE_SIZE) \
654 r = X##_f[0]; \
655 else if (rsize <= 2*_FP_W_TYPE_SIZE) \
657 r = X##_f[1]; \
658 r <<= _FP_W_TYPE_SIZE; \
659 r += X##_f[0]; \
661 else \
663 /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
664 /* and int == 4words as a single case. */ \
665 r = X##_f[3]; \
666 r <<= _FP_W_TYPE_SIZE; \
667 r += X##_f[2]; \
668 r <<= _FP_W_TYPE_SIZE; \
669 r += X##_f[1]; \
670 r <<= _FP_W_TYPE_SIZE; \
671 r += X##_f[0]; \
673 } while (0)
675 /* "No disassemble Number Five!" */
676 /* move an integer of size rsize into X's fractional part. We rely on
677 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
678 * having to mask the values we store into it.
680 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
681 do { \
682 X##_f[0] = r; \
683 X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
684 X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
685 X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
686 } while (0);
688 #define _FP_FRAC_COPY_4_1(D, S) \
689 do { \
690 D##_f[0] = S##_f; \
691 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
692 } while (0)
694 #define _FP_FRAC_COPY_4_2(D, S) \
695 do { \
696 D##_f[0] = S##_f0; \
697 D##_f[1] = S##_f1; \
698 D##_f[2] = D##_f[3] = 0; \
699 } while (0)
701 #define _FP_FRAC_COPY_4_4(D,S) _FP_FRAC_COPY_4(D,S)