1 /* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
3 Copyright (C) 1997-2017 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #ifndef SOFT_FP_OP_4_H
34 #define SOFT_FP_OP_4_H 1
36 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
37 #define _FP_FRAC_COPY_4(D, S) \
38 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
39 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
40 #define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
41 #define _FP_FRAC_HIGH_4(X) (X##_f[3])
42 #define _FP_FRAC_LOW_4(X) (X##_f[0])
43 #define _FP_FRAC_WORD_4(X, w) (X##_f[w])
45 #define _FP_FRAC_SLL_4(X, N) \
48 _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
49 _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
50 _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
51 _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
52 _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
53 if (!_FP_FRAC_SLL_4_up) \
54 for (_FP_FRAC_SLL_4_i = 3; \
55 _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
57 X##_f[_FP_FRAC_SLL_4_i] \
58 = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
61 for (_FP_FRAC_SLL_4_i = 3; \
62 _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
64 X##_f[_FP_FRAC_SLL_4_i] \
65 = ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
66 << _FP_FRAC_SLL_4_up) \
67 | (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
68 >> _FP_FRAC_SLL_4_down)); \
69 X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
71 for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
72 X##_f[_FP_FRAC_SLL_4_i] = 0; \
76 /* This one was broken too. */
77 #define _FP_FRAC_SRL_4(X, N) \
80 _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
81 _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
82 _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
83 _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
84 _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
85 if (!_FP_FRAC_SRL_4_down) \
86 for (_FP_FRAC_SRL_4_i = 0; \
87 _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
89 X##_f[_FP_FRAC_SRL_4_i] \
90 = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
93 for (_FP_FRAC_SRL_4_i = 0; \
94 _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
96 X##_f[_FP_FRAC_SRL_4_i] \
97 = ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
98 >> _FP_FRAC_SRL_4_down) \
99 | (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
100 << _FP_FRAC_SRL_4_up)); \
101 X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
103 for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
104 X##_f[_FP_FRAC_SRL_4_i] = 0; \
109 /* Right shift with sticky-lsb.
110 What this actually means is that we do a standard right-shift,
111 but that if any of the bits that fall off the right hand side
112 were one then we always set the LSbit. */
113 #define _FP_FRAC_SRST_4(X, S, N, size) \
116 _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
117 _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
118 _FP_W_TYPE _FP_FRAC_SRST_4_s; \
119 _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
120 _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
121 _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
122 for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
123 _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
124 ++_FP_FRAC_SRST_4_i) \
125 _FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
126 if (!_FP_FRAC_SRST_4_down) \
127 for (_FP_FRAC_SRST_4_i = 0; \
128 _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
129 ++_FP_FRAC_SRST_4_i) \
130 X##_f[_FP_FRAC_SRST_4_i] \
131 = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
135 |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
136 for (_FP_FRAC_SRST_4_i = 0; \
137 _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
138 ++_FP_FRAC_SRST_4_i) \
139 X##_f[_FP_FRAC_SRST_4_i] \
140 = ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
141 >> _FP_FRAC_SRST_4_down) \
142 | (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
143 << _FP_FRAC_SRST_4_up)); \
144 X##_f[_FP_FRAC_SRST_4_i++] \
145 = X##_f[3] >> _FP_FRAC_SRST_4_down; \
147 for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
148 X##_f[_FP_FRAC_SRST_4_i] = 0; \
149 S = (_FP_FRAC_SRST_4_s != 0); \
153 #define _FP_FRAC_SRS_4(X, N, size) \
156 int _FP_FRAC_SRS_4_sticky; \
157 _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size)); \
158 X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
162 #define _FP_FRAC_ADD_4(R, X, Y) \
163 __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
164 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
165 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
167 #define _FP_FRAC_SUB_4(R, X, Y) \
168 __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
169 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
170 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
172 #define _FP_FRAC_DEC_4(X, Y) \
173 __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
174 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
176 #define _FP_FRAC_ADDI_4(X, I) \
177 __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
179 #define _FP_ZEROFRAC_4 0, 0, 0, 0
180 #define _FP_MINFRAC_4 0, 0, 0, 1
181 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
183 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
184 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
185 #define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
186 #define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
187 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
188 #define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
190 #define _FP_FRAC_EQ_4(X, Y) \
191 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
192 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
194 #define _FP_FRAC_GT_4(X, Y) \
195 (X##_f[3] > Y##_f[3] \
196 || (X##_f[3] == Y##_f[3] \
197 && (X##_f[2] > Y##_f[2] \
198 || (X##_f[2] == Y##_f[2] \
199 && (X##_f[1] > Y##_f[1] \
200 || (X##_f[1] == Y##_f[1] \
201 && X##_f[0] > Y##_f[0]))))))
203 #define _FP_FRAC_GE_4(X, Y) \
204 (X##_f[3] > Y##_f[3] \
205 || (X##_f[3] == Y##_f[3] \
206 && (X##_f[2] > Y##_f[2] \
207 || (X##_f[2] == Y##_f[2] \
208 && (X##_f[1] > Y##_f[1] \
209 || (X##_f[1] == Y##_f[1] \
210 && X##_f[0] >= Y##_f[0]))))))
213 #define _FP_FRAC_CLZ_4(R, X) \
217 __FP_CLZ ((R), X##_f[3]); \
220 __FP_CLZ ((R), X##_f[2]); \
221 (R) += _FP_W_TYPE_SIZE; \
225 __FP_CLZ ((R), X##_f[1]); \
226 (R) += _FP_W_TYPE_SIZE*2; \
230 __FP_CLZ ((R), X##_f[0]); \
231 (R) += _FP_W_TYPE_SIZE*3; \
237 #define _FP_UNPACK_RAW_4(fs, X, val) \
240 union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
241 _FP_UNPACK_RAW_4_flo.flt = (val); \
242 X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
243 X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
244 X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
245 X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
246 X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
247 X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
251 #define _FP_UNPACK_RAW_4_P(fs, X, val) \
254 union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
255 = (union _FP_UNION_##fs *) (val); \
257 X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
258 X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
259 X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
260 X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
261 X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
262 X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
266 #define _FP_PACK_RAW_4(fs, val, X) \
269 union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
270 _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
271 _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
272 _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
273 _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
274 _FP_PACK_RAW_4_flo.bits.exp = X##_e; \
275 _FP_PACK_RAW_4_flo.bits.sign = X##_s; \
276 (val) = _FP_PACK_RAW_4_flo.flt; \
280 #define _FP_PACK_RAW_4_P(fs, val, X) \
283 union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
284 = (union _FP_UNION_##fs *) (val); \
286 _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
287 _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
288 _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
289 _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
290 _FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
291 _FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
295 /* Multiplication algorithms: */
297 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
299 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
302 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
303 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
304 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
305 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
306 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
308 doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
309 X##_f[0], Y##_f[0]); \
310 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
311 X##_f[0], Y##_f[1]); \
312 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
313 X##_f[1], Y##_f[0]); \
314 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
315 X##_f[1], Y##_f[1]); \
316 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
317 X##_f[0], Y##_f[2]); \
318 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
319 X##_f[2], Y##_f[0]); \
320 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
321 _FP_FRAC_WORD_8 (R, 1), 0, \
322 _FP_MUL_MEAT_DW_4_wide_b_f1, \
323 _FP_MUL_MEAT_DW_4_wide_b_f0, \
324 0, 0, _FP_FRAC_WORD_8 (R, 1)); \
325 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
326 _FP_FRAC_WORD_8 (R, 1), 0, \
327 _FP_MUL_MEAT_DW_4_wide_c_f1, \
328 _FP_MUL_MEAT_DW_4_wide_c_f0, \
329 _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
330 _FP_FRAC_WORD_8 (R, 1)); \
331 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
332 _FP_FRAC_WORD_8 (R, 2), 0, \
333 _FP_MUL_MEAT_DW_4_wide_d_f1, \
334 _FP_MUL_MEAT_DW_4_wide_d_f0, \
335 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
336 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
337 _FP_FRAC_WORD_8 (R, 2), 0, \
338 _FP_MUL_MEAT_DW_4_wide_e_f1, \
339 _FP_MUL_MEAT_DW_4_wide_e_f0, \
340 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
341 _FP_FRAC_WORD_8 (R, 2)); \
342 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
343 _FP_FRAC_WORD_8 (R, 2), 0, \
344 _FP_MUL_MEAT_DW_4_wide_f_f1, \
345 _FP_MUL_MEAT_DW_4_wide_f_f0, \
346 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
347 _FP_FRAC_WORD_8 (R, 2)); \
348 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
349 _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
350 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
351 _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
352 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
353 X##_f[1], Y##_f[2]); \
354 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
355 X##_f[2], Y##_f[1]); \
356 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
357 _FP_FRAC_WORD_8 (R, 3), 0, \
358 _FP_MUL_MEAT_DW_4_wide_b_f1, \
359 _FP_MUL_MEAT_DW_4_wide_b_f0, \
360 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
361 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
362 _FP_FRAC_WORD_8 (R, 3), 0, \
363 _FP_MUL_MEAT_DW_4_wide_c_f1, \
364 _FP_MUL_MEAT_DW_4_wide_c_f0, \
365 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
366 _FP_FRAC_WORD_8 (R, 3)); \
367 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
368 _FP_FRAC_WORD_8 (R, 3), 0, \
369 _FP_MUL_MEAT_DW_4_wide_d_f1, \
370 _FP_MUL_MEAT_DW_4_wide_d_f0, \
371 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
372 _FP_FRAC_WORD_8 (R, 3)); \
373 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
374 _FP_FRAC_WORD_8 (R, 3), 0, \
375 _FP_MUL_MEAT_DW_4_wide_e_f1, \
376 _FP_MUL_MEAT_DW_4_wide_e_f0, \
377 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
378 _FP_FRAC_WORD_8 (R, 3)); \
379 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
380 X##_f[2], Y##_f[2]); \
381 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
382 X##_f[1], Y##_f[3]); \
383 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
384 X##_f[3], Y##_f[1]); \
385 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
386 X##_f[2], Y##_f[3]); \
387 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
388 X##_f[3], Y##_f[2]); \
389 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
390 _FP_FRAC_WORD_8 (R, 4), 0, \
391 _FP_MUL_MEAT_DW_4_wide_b_f1, \
392 _FP_MUL_MEAT_DW_4_wide_b_f0, \
393 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
394 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
395 _FP_FRAC_WORD_8 (R, 4), 0, \
396 _FP_MUL_MEAT_DW_4_wide_c_f1, \
397 _FP_MUL_MEAT_DW_4_wide_c_f0, \
398 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
399 _FP_FRAC_WORD_8 (R, 4)); \
400 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
401 _FP_FRAC_WORD_8 (R, 4), 0, \
402 _FP_MUL_MEAT_DW_4_wide_d_f1, \
403 _FP_MUL_MEAT_DW_4_wide_d_f0, \
404 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
405 _FP_FRAC_WORD_8 (R, 4)); \
406 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
407 _FP_FRAC_WORD_8 (R, 5), 0, \
408 _FP_MUL_MEAT_DW_4_wide_e_f1, \
409 _FP_MUL_MEAT_DW_4_wide_e_f0, \
410 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
411 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
412 _FP_FRAC_WORD_8 (R, 5), 0, \
413 _FP_MUL_MEAT_DW_4_wide_f_f1, \
414 _FP_MUL_MEAT_DW_4_wide_f_f0, \
415 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
416 _FP_FRAC_WORD_8 (R, 5)); \
417 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
418 X##_f[3], Y##_f[3]); \
419 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
420 _FP_MUL_MEAT_DW_4_wide_b_f1, \
421 _FP_MUL_MEAT_DW_4_wide_b_f0, \
422 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
426 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
429 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
431 _FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z, \
434 /* Normalize since we know where the msb of the multiplicands \
435 were (bit B), we know that the msb of the of the product is \
436 at either 2B or 2B-1. */ \
437 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1, \
439 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
440 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
441 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
442 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
446 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
449 mpn_mul_n (R##_f, _x_f, _y_f, 4); \
453 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
456 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
458 _FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y); \
460 /* Normalize since we know where the msb of the multiplicands \
461 were (bit B), we know that the msb of the of the product is \
462 at either 2B or 2B-1. */ \
463 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1, \
465 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
466 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
467 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
468 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
472 /* Helper utility for _FP_DIV_MEAT_4_udiv:
474 #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
477 UWtype umul_ppppmnnn_t; \
478 umul_ppmm (p1, p0, m, n0); \
479 umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
480 __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
481 umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
482 __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
486 /* Division algorithms: */
488 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
491 int _FP_DIV_MEAT_4_udiv_i; \
492 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
493 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
494 _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
495 if (_FP_FRAC_GE_4 (X, Y)) \
497 _FP_DIV_MEAT_4_udiv_n_f[3] \
498 = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
499 _FP_FRAC_SRL_4 (X, 1); \
504 /* Normalize, i.e. make the most significant bit of the \
505 denominator set. */ \
506 _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
508 for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
510 if (X##_f[3] == Y##_f[3]) \
512 /* This is a special case, not an optimization \
513 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
514 As X## is guaranteed to be < Y, \
515 R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
516 (UWtype)-1 or (UWtype)-2. */ \
517 R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
518 if (!_FP_DIV_MEAT_4_udiv_i) \
520 __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
521 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
522 X##_f[2], X##_f[1], X##_f[0], \
523 _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
524 _FP_FRAC_SUB_4 (X, Y, X); \
525 if (X##_f[3] > Y##_f[3]) \
527 R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
528 _FP_FRAC_ADD_4 (X, Y, X); \
533 udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
534 X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
535 umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
536 _FP_DIV_MEAT_4_udiv_m_f[2], \
537 _FP_DIV_MEAT_4_udiv_m_f[1], \
538 _FP_DIV_MEAT_4_udiv_m_f[0], \
539 R##_f[_FP_DIV_MEAT_4_udiv_i], \
540 Y##_f[2], Y##_f[1], Y##_f[0]); \
541 X##_f[2] = X##_f[1]; \
542 X##_f[1] = X##_f[0]; \
544 = _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
545 if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
547 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
548 _FP_FRAC_ADD_4 (X, Y, X); \
549 if (_FP_FRAC_GE_4 (X, Y) \
550 && _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
552 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
553 _FP_FRAC_ADD_4 (X, Y, X); \
556 _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
557 if (!_FP_DIV_MEAT_4_udiv_i) \
559 if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
560 R##_f[0] |= _FP_WORK_STICKY; \
569 /* Square root algorithms:
570 We have just one right now, maybe Newton approximation
571 should be added for those machines where division is fast. */
573 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
578 T##_f[3] = S##_f[3] + (q); \
579 if (T##_f[3] <= X##_f[3]) \
581 S##_f[3] = T##_f[3] + (q); \
582 X##_f[3] -= T##_f[3]; \
585 _FP_FRAC_SLL_4 (X, 1); \
588 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
591 T##_f[2] = S##_f[2] + (q); \
592 T##_f[3] = S##_f[3]; \
593 if (T##_f[3] < X##_f[3] \
594 || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
596 S##_f[2] = T##_f[2] + (q); \
597 S##_f[3] += (T##_f[2] > S##_f[2]); \
598 __FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
599 T##_f[3], T##_f[2]); \
602 _FP_FRAC_SLL_4 (X, 1); \
605 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
608 T##_f[1] = S##_f[1] + (q); \
609 T##_f[2] = S##_f[2]; \
610 T##_f[3] = S##_f[3]; \
611 if (T##_f[3] < X##_f[3] \
612 || (T##_f[3] == X##_f[3] \
613 && (T##_f[2] < X##_f[2] \
614 || (T##_f[2] == X##_f[2] \
615 && T##_f[1] <= X##_f[1])))) \
617 S##_f[1] = T##_f[1] + (q); \
618 S##_f[2] += (T##_f[1] > S##_f[1]); \
619 S##_f[3] += (T##_f[2] > S##_f[2]); \
620 __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
621 T##_f[3], T##_f[2], T##_f[1]); \
624 _FP_FRAC_SLL_4 (X, 1); \
627 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
628 while ((q) != _FP_WORK_ROUND) \
630 T##_f[0] = S##_f[0] + (q); \
631 T##_f[1] = S##_f[1]; \
632 T##_f[2] = S##_f[2]; \
633 T##_f[3] = S##_f[3]; \
634 if (_FP_FRAC_GE_4 (X, T)) \
636 S##_f[0] = T##_f[0] + (q); \
637 S##_f[1] += (T##_f[0] > S##_f[0]); \
638 S##_f[2] += (T##_f[1] > S##_f[1]); \
639 S##_f[3] += (T##_f[2] > S##_f[2]); \
640 _FP_FRAC_DEC_4 (X, T); \
643 _FP_FRAC_SLL_4 (X, 1); \
646 if (!_FP_FRAC_ZEROP_4 (X)) \
648 if (_FP_FRAC_GT_4 (X, S)) \
649 R##_f[0] |= _FP_WORK_ROUND; \
650 R##_f[0] |= _FP_WORK_STICKY; \
658 #define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
659 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
661 #ifndef __FP_FRAC_ADD_3
662 # define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
665 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
667 __FP_FRAC_ADD_3_c1 = r0 < x0; \
669 __FP_FRAC_ADD_3_c2 = r1 < x1; \
670 r1 += __FP_FRAC_ADD_3_c1; \
671 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
672 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
677 #ifndef __FP_FRAC_ADD_4
678 # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
681 _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
682 _FP_W_TYPE __FP_FRAC_ADD_4_c3; \
684 __FP_FRAC_ADD_4_c1 = r0 < x0; \
686 __FP_FRAC_ADD_4_c2 = r1 < x1; \
687 r1 += __FP_FRAC_ADD_4_c1; \
688 __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
690 __FP_FRAC_ADD_4_c3 = r2 < x2; \
691 r2 += __FP_FRAC_ADD_4_c2; \
692 __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
693 r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
698 #ifndef __FP_FRAC_SUB_3
699 # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
702 _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
704 __FP_FRAC_SUB_3_c1 = r0 > x0; \
706 __FP_FRAC_SUB_3_c2 = r1 > x1; \
707 r1 -= __FP_FRAC_SUB_3_c1; \
708 __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
709 r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
714 #ifndef __FP_FRAC_SUB_4
715 # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
718 _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
719 _FP_W_TYPE __FP_FRAC_SUB_4_c3; \
721 __FP_FRAC_SUB_4_c1 = r0 > x0; \
723 __FP_FRAC_SUB_4_c2 = r1 > x1; \
724 r1 -= __FP_FRAC_SUB_4_c1; \
725 __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
727 __FP_FRAC_SUB_4_c3 = r2 > x2; \
728 r2 -= __FP_FRAC_SUB_4_c2; \
729 __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
730 r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
735 #ifndef __FP_FRAC_DEC_3
736 # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
739 UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
740 UWtype __FP_FRAC_DEC_3_t2; \
741 __FP_FRAC_DEC_3_t0 = x0; \
742 __FP_FRAC_DEC_3_t1 = x1; \
743 __FP_FRAC_DEC_3_t2 = x2; \
744 __FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
745 __FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
751 #ifndef __FP_FRAC_DEC_4
752 # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
755 UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
756 UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
757 __FP_FRAC_DEC_4_t0 = x0; \
758 __FP_FRAC_DEC_4_t1 = x1; \
759 __FP_FRAC_DEC_4_t2 = x2; \
760 __FP_FRAC_DEC_4_t3 = x3; \
761 __FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
762 __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
763 __FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
768 #ifndef __FP_FRAC_ADDI_4
769 # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
772 UWtype __FP_FRAC_ADDI_4_t; \
773 __FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
774 x1 += __FP_FRAC_ADDI_4_t; \
775 __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
776 x2 += __FP_FRAC_ADDI_4_t; \
777 __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
778 x3 += __FP_FRAC_ADDI_4_t; \
783 /* Convert FP values between word sizes. This appears to be more
784 complicated than I'd have expected it to be, so these might be
785 wrong... These macros are in any case somewhat bogus because they
786 use information about what various FRAC_n variables look like
787 internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
788 the ones in op-2.h and op-1.h. */
789 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
791 #define _FP_FRAC_COPY_2_4(D, S) \
799 /* Assembly/disassembly for converting to/from integral types.
800 No shifting or overflow handled here. */
801 /* Put the FP value X into r, which is an integer of size rsize. */
802 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
805 if ((rsize) <= _FP_W_TYPE_SIZE) \
807 else if ((rsize) <= 2*_FP_W_TYPE_SIZE) \
810 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
812 : (r) << _FP_W_TYPE_SIZE); \
817 /* I'm feeling lazy so we deal with int == 3words \
818 (implausible) and int == 4words as a single case. */ \
820 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
822 : (r) << _FP_W_TYPE_SIZE); \
824 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
826 : (r) << _FP_W_TYPE_SIZE); \
828 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
830 : (r) << _FP_W_TYPE_SIZE); \
836 /* "No disassemble Number Five!" */
837 /* Move an integer of size rsize into X's fractional part. We rely on
838 the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
839 having to mask the values we store into it. */
840 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
844 X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE \
846 : (r) >> _FP_W_TYPE_SIZE); \
847 X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE \
849 : (r) >> 2*_FP_W_TYPE_SIZE); \
850 X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE \
852 : (r) >> 3*_FP_W_TYPE_SIZE); \
856 #define _FP_FRAC_COPY_4_1(D, S) \
860 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
864 #define _FP_FRAC_COPY_4_2(D, S) \
869 D##_f[2] = D##_f[3] = 0; \
873 #define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)
875 #endif /* !SOFT_FP_OP_4_H */