ARM: Disable compat mcount code when unneeded.
[glibc.git] / soft-fp / op-4.h
bloba80bdb2f27841487475b1867d4e61ce1cda0ef52
1 /* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
3 Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
34 #define _FP_FRAC_COPY_4(D, S) \
35 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
36 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37 #define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
38 #define _FP_FRAC_HIGH_4(X) (X##_f[3])
39 #define _FP_FRAC_LOW_4(X) (X##_f[0])
40 #define _FP_FRAC_WORD_4(X, w) (X##_f[w])
42 #define _FP_FRAC_SLL_4(X, N) \
43 do \
44 { \
45 _FP_I_TYPE _up, _down, _skip, _i; \
46 _skip = (N) / _FP_W_TYPE_SIZE; \
47 _up = (N) % _FP_W_TYPE_SIZE; \
48 _down = _FP_W_TYPE_SIZE - _up; \
49 if (!_up) \
50 for (_i = 3; _i >= _skip; --_i) \
51 X##_f[_i] = X##_f[_i-_skip]; \
52 else \
53 { \
54 for (_i = 3; _i > _skip; --_i) \
55 X##_f[_i] = (X##_f[_i-_skip] << _up \
56 | X##_f[_i-_skip-1] >> _down); \
57 X##_f[_i--] = X##_f[0] << _up; \
58 } \
59 for (; _i >= 0; --_i) \
60 X##_f[_i] = 0; \
61 } \
62 while (0)
64 /* This one was broken too */
65 #define _FP_FRAC_SRL_4(X, N) \
66 do \
67 { \
68 _FP_I_TYPE _up, _down, _skip, _i; \
69 _skip = (N) / _FP_W_TYPE_SIZE; \
70 _down = (N) % _FP_W_TYPE_SIZE; \
71 _up = _FP_W_TYPE_SIZE - _down; \
72 if (!_down) \
73 for (_i = 0; _i <= 3-_skip; ++_i) \
74 X##_f[_i] = X##_f[_i+_skip]; \
75 else \
76 { \
77 for (_i = 0; _i < 3-_skip; ++_i) \
78 X##_f[_i] = (X##_f[_i+_skip] >> _down \
79 | X##_f[_i+_skip+1] << _up); \
80 X##_f[_i++] = X##_f[3] >> _down; \
81 } \
82 for (; _i < 4; ++_i) \
83 X##_f[_i] = 0; \
84 } \
85 while (0)
88 /* Right shift with sticky-lsb.
89 * What this actually means is that we do a standard right-shift,
90 * but that if any of the bits that fall off the right hand side
91 * were one then we always set the LSbit.
93 #define _FP_FRAC_SRST_4(X, S, N, size) \
94 do \
95 { \
96 _FP_I_TYPE _up, _down, _skip, _i; \
97 _FP_W_TYPE _s; \
98 _skip = (N) / _FP_W_TYPE_SIZE; \
99 _down = (N) % _FP_W_TYPE_SIZE; \
100 _up = _FP_W_TYPE_SIZE - _down; \
101 for (_s = _i = 0; _i < _skip; ++_i) \
102 _s |= X##_f[_i]; \
103 if (!_down) \
104 for (_i = 0; _i <= 3-_skip; ++_i) \
105 X##_f[_i] = X##_f[_i+_skip]; \
106 else \
108 _s |= X##_f[_i] << _up; \
109 for (_i = 0; _i < 3-_skip; ++_i) \
110 X##_f[_i] = (X##_f[_i+_skip] >> _down \
111 | X##_f[_i+_skip+1] << _up); \
112 X##_f[_i++] = X##_f[3] >> _down; \
114 for (; _i < 4; ++_i) \
115 X##_f[_i] = 0; \
116 S = (_s != 0); \
118 while (0)
120 #define _FP_FRAC_SRS_4(X, N, size) \
121 do \
123 int _sticky; \
124 _FP_FRAC_SRST_4 (X, _sticky, N, size); \
125 X##_f[0] |= _sticky; \
127 while (0)
129 #define _FP_FRAC_ADD_4(R, X, Y) \
130 __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
131 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
132 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
134 #define _FP_FRAC_SUB_4(R, X, Y) \
135 __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
136 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
137 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
139 #define _FP_FRAC_DEC_4(X, Y) \
140 __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
141 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
143 #define _FP_FRAC_ADDI_4(X, I) \
144 __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
146 #define _FP_ZEROFRAC_4 0, 0, 0, 0
147 #define _FP_MINFRAC_4 0, 0, 0, 1
148 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
150 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
151 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
152 #define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
153 #define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
154 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
155 #define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
157 #define _FP_FRAC_EQ_4(X, Y) \
158 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
159 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
161 #define _FP_FRAC_GT_4(X, Y) \
162 (X##_f[3] > Y##_f[3] \
163 || (X##_f[3] == Y##_f[3] \
164 && (X##_f[2] > Y##_f[2] \
165 || (X##_f[2] == Y##_f[2] \
166 && (X##_f[1] > Y##_f[1] \
167 || (X##_f[1] == Y##_f[1] \
168 && X##_f[0] > Y##_f[0]))))))
170 #define _FP_FRAC_GE_4(X, Y) \
171 (X##_f[3] > Y##_f[3] \
172 || (X##_f[3] == Y##_f[3] \
173 && (X##_f[2] > Y##_f[2] \
174 || (X##_f[2] == Y##_f[2] \
175 && (X##_f[1] > Y##_f[1] \
176 || (X##_f[1] == Y##_f[1] \
177 && X##_f[0] >= Y##_f[0]))))))
180 #define _FP_FRAC_CLZ_4(R, X) \
181 do \
183 if (X##_f[3]) \
184 __FP_CLZ (R, X##_f[3]); \
185 else if (X##_f[2]) \
187 __FP_CLZ (R, X##_f[2]); \
188 R += _FP_W_TYPE_SIZE; \
190 else if (X##_f[1]) \
192 __FP_CLZ (R, X##_f[1]); \
193 R += _FP_W_TYPE_SIZE*2; \
195 else \
197 __FP_CLZ (R, X##_f[0]); \
198 R += _FP_W_TYPE_SIZE*3; \
201 while (0)
204 #define _FP_UNPACK_RAW_4(fs, X, val) \
205 do \
207 union _FP_UNION_##fs _flo; \
208 _flo.flt = (val); \
209 X##_f[0] = _flo.bits.frac0; \
210 X##_f[1] = _flo.bits.frac1; \
211 X##_f[2] = _flo.bits.frac2; \
212 X##_f[3] = _flo.bits.frac3; \
213 X##_e = _flo.bits.exp; \
214 X##_s = _flo.bits.sign; \
216 while (0)
218 #define _FP_UNPACK_RAW_4_P(fs, X, val) \
219 do \
221 union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
223 X##_f[0] = _flo->bits.frac0; \
224 X##_f[1] = _flo->bits.frac1; \
225 X##_f[2] = _flo->bits.frac2; \
226 X##_f[3] = _flo->bits.frac3; \
227 X##_e = _flo->bits.exp; \
228 X##_s = _flo->bits.sign; \
230 while (0)
232 #define _FP_PACK_RAW_4(fs, val, X) \
233 do \
235 union _FP_UNION_##fs _flo; \
236 _flo.bits.frac0 = X##_f[0]; \
237 _flo.bits.frac1 = X##_f[1]; \
238 _flo.bits.frac2 = X##_f[2]; \
239 _flo.bits.frac3 = X##_f[3]; \
240 _flo.bits.exp = X##_e; \
241 _flo.bits.sign = X##_s; \
242 (val) = _flo.flt; \
244 while (0)
246 #define _FP_PACK_RAW_4_P(fs, val, X) \
247 do \
249 union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \
251 _flo->bits.frac0 = X##_f[0]; \
252 _flo->bits.frac1 = X##_f[1]; \
253 _flo->bits.frac2 = X##_f[2]; \
254 _flo->bits.frac3 = X##_f[3]; \
255 _flo->bits.exp = X##_e; \
256 _flo->bits.sign = X##_s; \
258 while (0)
261 * Multiplication algorithms:
264 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
266 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
267 do \
269 _FP_FRAC_DECL_2 (_b); \
270 _FP_FRAC_DECL_2 (_c); \
271 _FP_FRAC_DECL_2 (_d); \
272 _FP_FRAC_DECL_2 (_e); \
273 _FP_FRAC_DECL_2 (_f); \
275 doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), X##_f[0], Y##_f[0]); \
276 doit (_b_f1, _b_f0, X##_f[0], Y##_f[1]); \
277 doit (_c_f1, _c_f0, X##_f[1], Y##_f[0]); \
278 doit (_d_f1, _d_f0, X##_f[1], Y##_f[1]); \
279 doit (_e_f1, _e_f0, X##_f[0], Y##_f[2]); \
280 doit (_f_f1, _f_f0, X##_f[2], Y##_f[0]); \
281 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
282 _FP_FRAC_WORD_8 (R, 1), 0, _b_f1, _b_f0, \
283 0, 0, _FP_FRAC_WORD_8 (R, 1)); \
284 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
285 _FP_FRAC_WORD_8 (R, 1), 0, _c_f1, _c_f0, \
286 _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
287 _FP_FRAC_WORD_8 (R, 1)); \
288 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
289 _FP_FRAC_WORD_8 (R, 2), 0, _d_f1, _d_f0, \
290 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
291 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
292 _FP_FRAC_WORD_8 (R, 2), 0, _e_f1, _e_f0, \
293 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
294 _FP_FRAC_WORD_8 (R, 2)); \
295 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
296 _FP_FRAC_WORD_8 (R, 2), 0, _f_f1, _f_f0, \
297 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
298 _FP_FRAC_WORD_8 (R, 2)); \
299 doit (_b_f1, _b_f0, X##_f[0], Y##_f[3]); \
300 doit (_c_f1, _c_f0, X##_f[3], Y##_f[0]); \
301 doit (_d_f1, _d_f0, X##_f[1], Y##_f[2]); \
302 doit (_e_f1, _e_f0, X##_f[2], Y##_f[1]); \
303 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
304 _FP_FRAC_WORD_8 (R, 3), 0, _b_f1, _b_f0, \
305 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
306 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
307 _FP_FRAC_WORD_8 (R, 3), 0, _c_f1, _c_f0, \
308 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
309 _FP_FRAC_WORD_8 (R, 3)); \
310 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
311 _FP_FRAC_WORD_8 (R, 3), 0, _d_f1, _d_f0, \
312 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
313 _FP_FRAC_WORD_8 (R, 3)); \
314 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
315 _FP_FRAC_WORD_8 (R, 3), 0, _e_f1, _e_f0, \
316 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
317 _FP_FRAC_WORD_8 (R, 3)); \
318 doit (_b_f1, _b_f0, X##_f[2], Y##_f[2]); \
319 doit (_c_f1, _c_f0, X##_f[1], Y##_f[3]); \
320 doit (_d_f1, _d_f0, X##_f[3], Y##_f[1]); \
321 doit (_e_f1, _e_f0, X##_f[2], Y##_f[3]); \
322 doit (_f_f1, _f_f0, X##_f[3], Y##_f[2]); \
323 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
324 _FP_FRAC_WORD_8 (R, 4), 0, _b_f1, _b_f0, \
325 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
326 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
327 _FP_FRAC_WORD_8 (R, 4), 0, _c_f1, _c_f0, \
328 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
329 _FP_FRAC_WORD_8 (R, 4)); \
330 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
331 _FP_FRAC_WORD_8 (R, 4), 0, _d_f1, _d_f0, \
332 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
333 _FP_FRAC_WORD_8 (R, 4)); \
334 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
335 _FP_FRAC_WORD_8 (R, 5), 0, _e_f1, _e_f0, \
336 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
337 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
338 _FP_FRAC_WORD_8 (R, 5), 0, _f_f1, _f_f0, \
339 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
340 _FP_FRAC_WORD_8 (R, 5)); \
341 doit (_b_f1, _b_f0, X##_f[3], Y##_f[3]); \
342 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
343 _b_f1, _b_f0, \
344 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
346 while (0)
348 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
349 do \
351 _FP_FRAC_DECL_8 (_z); \
353 _FP_MUL_MEAT_DW_4_wide (wfracbits, _z, X, Y, doit); \
355 /* Normalize since we know where the msb of the multiplicands \
356 were (bit B), we know that the msb of the of the product is \
357 at either 2B or 2B-1. */ \
358 _FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \
359 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \
360 _FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \
362 while (0)
364 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
365 do \
367 mpn_mul_n (R##_f, _x_f, _y_f, 4); \
369 while (0)
371 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
372 do \
374 _FP_FRAC_DECL_8 (_z); \
376 _FP_MUL_MEAT_DW_4_gmp (wfracbits, _z, X, Y); \
378 /* Normalize since we know where the msb of the multiplicands \
379 were (bit B), we know that the msb of the of the product is \
380 at either 2B or 2B-1. */ \
381 _FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \
382 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \
383 _FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \
385 while (0)
388 * Helper utility for _FP_DIV_MEAT_4_udiv:
389 * pppp = m * nnn
391 #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
392 do \
394 UWtype _t; \
395 umul_ppmm (p1, p0, m, n0); \
396 umul_ppmm (p2, _t, m, n1); \
397 __FP_FRAC_ADDI_2 (p2, p1, _t); \
398 umul_ppmm (p3, _t, m, n2); \
399 __FP_FRAC_ADDI_2 (p3, p2, _t); \
401 while (0)
404 * Division algorithms:
407 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
408 do \
410 int _i; \
411 _FP_FRAC_DECL_4 (_n); \
412 _FP_FRAC_DECL_4 (_m); \
413 _FP_FRAC_SET_4 (_n, _FP_ZEROFRAC_4); \
414 if (_FP_FRAC_GE_4 (X, Y)) \
416 _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
417 _FP_FRAC_SRL_4 (X, 1); \
419 else \
420 R##_e--; \
422 /* Normalize, i.e. make the most significant bit of the \
423 denominator set. */ \
424 _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
426 for (_i = 3; ; _i--) \
428 if (X##_f[3] == Y##_f[3]) \
430 /* This is a special case, not an optimization \
431 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
432 As X## is guaranteed to be < Y, R##_f[_i] can be either \
433 (UWtype)-1 or (UWtype)-2. */ \
434 R##_f[_i] = -1; \
435 if (!_i) \
436 break; \
437 __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
438 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
439 X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \
440 _FP_FRAC_SUB_4 (X, Y, X); \
441 if (X##_f[3] > Y##_f[3]) \
443 R##_f[_i] = -2; \
444 _FP_FRAC_ADD_4 (X, Y, X); \
447 else \
449 udiv_qrnnd (R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
450 umul_ppppmnnn (_m_f[3], _m_f[2], _m_f[1], _m_f[0], \
451 R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \
452 X##_f[2] = X##_f[1]; \
453 X##_f[1] = X##_f[0]; \
454 X##_f[0] = _n_f[_i]; \
455 if (_FP_FRAC_GT_4 (_m, X)) \
457 R##_f[_i]--; \
458 _FP_FRAC_ADD_4 (X, Y, X); \
459 if (_FP_FRAC_GE_4 (X, Y) && _FP_FRAC_GT_4 (_m, X)) \
461 R##_f[_i]--; \
462 _FP_FRAC_ADD_4 (X, Y, X); \
465 _FP_FRAC_DEC_4 (X, _m); \
466 if (!_i) \
468 if (!_FP_FRAC_EQ_4 (X, _m)) \
469 R##_f[0] |= _FP_WORK_STICKY; \
470 break; \
475 while (0)
479 * Square root algorithms:
480 * We have just one right now, maybe Newton approximation
481 * should be added for those machines where division is fast.
484 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
485 do \
487 while (q) \
489 T##_f[3] = S##_f[3] + q; \
490 if (T##_f[3] <= X##_f[3]) \
492 S##_f[3] = T##_f[3] + q; \
493 X##_f[3] -= T##_f[3]; \
494 R##_f[3] += q; \
496 _FP_FRAC_SLL_4 (X, 1); \
497 q >>= 1; \
499 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
500 while (q) \
502 T##_f[2] = S##_f[2] + q; \
503 T##_f[3] = S##_f[3]; \
504 if (T##_f[3] < X##_f[3] \
505 || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
507 S##_f[2] = T##_f[2] + q; \
508 S##_f[3] += (T##_f[2] > S##_f[2]); \
509 __FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
510 T##_f[3], T##_f[2]); \
511 R##_f[2] += q; \
513 _FP_FRAC_SLL_4 (X, 1); \
514 q >>= 1; \
516 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
517 while (q) \
519 T##_f[1] = S##_f[1] + q; \
520 T##_f[2] = S##_f[2]; \
521 T##_f[3] = S##_f[3]; \
522 if (T##_f[3] < X##_f[3] \
523 || (T##_f[3] == X##_f[3] \
524 && (T##_f[2] < X##_f[2] \
525 || (T##_f[2] == X##_f[2] \
526 && T##_f[1] <= X##_f[1])))) \
528 S##_f[1] = T##_f[1] + q; \
529 S##_f[2] += (T##_f[1] > S##_f[1]); \
530 S##_f[3] += (T##_f[2] > S##_f[2]); \
531 __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
532 T##_f[3], T##_f[2], T##_f[1]); \
533 R##_f[1] += q; \
535 _FP_FRAC_SLL_4 (X, 1); \
536 q >>= 1; \
538 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
539 while (q != _FP_WORK_ROUND) \
541 T##_f[0] = S##_f[0] + q; \
542 T##_f[1] = S##_f[1]; \
543 T##_f[2] = S##_f[2]; \
544 T##_f[3] = S##_f[3]; \
545 if (_FP_FRAC_GE_4 (X, T)) \
547 S##_f[0] = T##_f[0] + q; \
548 S##_f[1] += (T##_f[0] > S##_f[0]); \
549 S##_f[2] += (T##_f[1] > S##_f[1]); \
550 S##_f[3] += (T##_f[2] > S##_f[2]); \
551 _FP_FRAC_DEC_4 (X, T); \
552 R##_f[0] += q; \
554 _FP_FRAC_SLL_4 (X, 1); \
555 q >>= 1; \
557 if (!_FP_FRAC_ZEROP_4 (X)) \
559 if (_FP_FRAC_GT_4 (X, S)) \
560 R##_f[0] |= _FP_WORK_ROUND; \
561 R##_f[0] |= _FP_WORK_STICKY; \
564 while (0)
568 * Internals
571 #define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
572 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
574 #ifndef __FP_FRAC_ADD_3
575 # define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
576 do \
578 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
579 r0 = x0 + y0; \
580 __FP_FRAC_ADD_3_c1 = r0 < x0; \
581 r1 = x1 + y1; \
582 __FP_FRAC_ADD_3_c2 = r1 < x1; \
583 r1 += __FP_FRAC_ADD_3_c1; \
584 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
585 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
587 while (0)
588 #endif
590 #ifndef __FP_FRAC_ADD_4
591 # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
592 do \
594 _FP_W_TYPE _c1, _c2, _c3; \
595 r0 = x0 + y0; \
596 _c1 = r0 < x0; \
597 r1 = x1 + y1; \
598 _c2 = r1 < x1; \
599 r1 += _c1; \
600 _c2 |= r1 < _c1; \
601 r2 = x2 + y2; \
602 _c3 = r2 < x2; \
603 r2 += _c2; \
604 _c3 |= r2 < _c2; \
605 r3 = x3 + y3 + _c3; \
607 while (0)
608 #endif
610 #ifndef __FP_FRAC_SUB_3
611 # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
612 do \
614 _FP_W_TYPE _c1, _c2; \
615 r0 = x0 - y0; \
616 _c1 = r0 > x0; \
617 r1 = x1 - y1; \
618 _c2 = r1 > x1; \
619 r1 -= _c1; \
620 _c2 |= _c1 && (y1 == x1); \
621 r2 = x2 - y2 - _c2; \
623 while (0)
624 #endif
626 #ifndef __FP_FRAC_SUB_4
627 # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
628 do \
630 _FP_W_TYPE _c1, _c2, _c3; \
631 r0 = x0 - y0; \
632 _c1 = r0 > x0; \
633 r1 = x1 - y1; \
634 _c2 = r1 > x1; \
635 r1 -= _c1; \
636 _c2 |= _c1 && (y1 == x1); \
637 r2 = x2 - y2; \
638 _c3 = r2 > x2; \
639 r2 -= _c2; \
640 _c3 |= _c2 && (y2 == x2); \
641 r3 = x3 - y3 - _c3; \
643 while (0)
644 #endif
646 #ifndef __FP_FRAC_DEC_3
647 # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
648 do \
650 UWtype _t0, _t1, _t2; \
651 _t0 = x0, _t1 = x1, _t2 = x2; \
652 __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \
654 while (0)
655 #endif
657 #ifndef __FP_FRAC_DEC_4
658 # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
659 do \
661 UWtype _t0, _t1, _t2, _t3; \
662 _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \
663 __FP_FRAC_SUB_4 (x3, x2, x1, x0, _t3, _t2, _t1, _t0, y3, y2, y1, y0); \
665 while (0)
666 #endif
668 #ifndef __FP_FRAC_ADDI_4
669 # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
670 do \
672 UWtype _t; \
673 _t = ((x0 += i) < i); \
674 x1 += _t; \
675 _t = (x1 < _t); \
676 x2 += _t; \
677 _t = (x2 < _t); \
678 x3 += _t; \
680 while (0)
681 #endif
683 /* Convert FP values between word sizes. This appears to be more
684 * complicated than I'd have expected it to be, so these might be
685 * wrong... These macros are in any case somewhat bogus because they
686 * use information about what various FRAC_n variables look like
687 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
688 * the ones in op-2.h and op-1.h.
690 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
692 #define _FP_FRAC_COPY_2_4(D, S) \
693 do \
695 D##_f0 = S##_f[0]; \
696 D##_f1 = S##_f[1]; \
698 while (0)
700 /* Assembly/disassembly for converting to/from integral types.
701 * No shifting or overflow handled here.
703 /* Put the FP value X into r, which is an integer of size rsize. */
704 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
705 do \
707 if (rsize <= _FP_W_TYPE_SIZE) \
708 r = X##_f[0]; \
709 else if (rsize <= 2*_FP_W_TYPE_SIZE) \
711 r = X##_f[1]; \
712 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
713 r += X##_f[0]; \
715 else \
717 /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
718 /* and int == 4words as a single case. */ \
719 r = X##_f[3]; \
720 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
721 r += X##_f[2]; \
722 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
723 r += X##_f[1]; \
724 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
725 r += X##_f[0]; \
728 while (0)
730 /* "No disassemble Number Five!" */
731 /* move an integer of size rsize into X's fractional part. We rely on
732 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
733 * having to mask the values we store into it.
735 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
736 do \
738 X##_f[0] = r; \
739 X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
740 X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
741 X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
743 while (0)
745 #define _FP_FRAC_COPY_4_1(D, S) \
746 do \
748 D##_f[0] = S##_f; \
749 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
751 while (0)
753 #define _FP_FRAC_COPY_4_2(D, S) \
754 do \
756 D##_f[0] = S##_f0; \
757 D##_f[1] = S##_f1; \
758 D##_f[2] = D##_f[3] = 0; \
760 while (0)
762 #define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)