1 /* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
3 Copyright (C) 1997-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with the GNU C Library; if not, see
27 <https://www.gnu.org/licenses/>. */
29 #ifndef SOFT_FP_OP_4_H
30 #define SOFT_FP_OP_4_H 1
32 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
33 #define _FP_FRAC_COPY_4(D, S) \
34 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
35 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
36 #define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
37 #define _FP_FRAC_HIGH_4(X) (X##_f[3])
38 #define _FP_FRAC_LOW_4(X) (X##_f[0])
39 #define _FP_FRAC_WORD_4(X, w) (X##_f[w])
41 #define _FP_FRAC_SLL_4(X, N) \
44 _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
45 _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
46 _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
47 _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
48 _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
49 if (!_FP_FRAC_SLL_4_up) \
50 for (_FP_FRAC_SLL_4_i = 3; \
51 _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
53 X##_f[_FP_FRAC_SLL_4_i] \
54 = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
57 for (_FP_FRAC_SLL_4_i = 3; \
58 _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
60 X##_f[_FP_FRAC_SLL_4_i] \
61 = ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
62 << _FP_FRAC_SLL_4_up) \
63 | (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
64 >> _FP_FRAC_SLL_4_down)); \
65 X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
67 for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
68 X##_f[_FP_FRAC_SLL_4_i] = 0; \
72 /* This one was broken too. */
73 #define _FP_FRAC_SRL_4(X, N) \
76 _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
77 _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
78 _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
79 _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
80 _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
81 if (!_FP_FRAC_SRL_4_down) \
82 for (_FP_FRAC_SRL_4_i = 0; \
83 _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
85 X##_f[_FP_FRAC_SRL_4_i] \
86 = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
89 for (_FP_FRAC_SRL_4_i = 0; \
90 _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
92 X##_f[_FP_FRAC_SRL_4_i] \
93 = ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
94 >> _FP_FRAC_SRL_4_down) \
95 | (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
96 << _FP_FRAC_SRL_4_up)); \
97 X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
99 for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
100 X##_f[_FP_FRAC_SRL_4_i] = 0; \
105 /* Right shift with sticky-lsb.
106 What this actually means is that we do a standard right-shift,
107 but that if any of the bits that fall off the right hand side
108 were one then we always set the LSbit. */
109 #define _FP_FRAC_SRST_4(X, S, N, size) \
112 _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
113 _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
114 _FP_W_TYPE _FP_FRAC_SRST_4_s; \
115 _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
116 _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
117 _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
118 for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
119 _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
120 ++_FP_FRAC_SRST_4_i) \
121 _FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
122 if (!_FP_FRAC_SRST_4_down) \
123 for (_FP_FRAC_SRST_4_i = 0; \
124 _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
125 ++_FP_FRAC_SRST_4_i) \
126 X##_f[_FP_FRAC_SRST_4_i] \
127 = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
131 |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
132 for (_FP_FRAC_SRST_4_i = 0; \
133 _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
134 ++_FP_FRAC_SRST_4_i) \
135 X##_f[_FP_FRAC_SRST_4_i] \
136 = ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
137 >> _FP_FRAC_SRST_4_down) \
138 | (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
139 << _FP_FRAC_SRST_4_up)); \
140 X##_f[_FP_FRAC_SRST_4_i++] \
141 = X##_f[3] >> _FP_FRAC_SRST_4_down; \
143 for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
144 X##_f[_FP_FRAC_SRST_4_i] = 0; \
145 S = (_FP_FRAC_SRST_4_s != 0); \
149 #define _FP_FRAC_SRS_4(X, N, size) \
152 int _FP_FRAC_SRS_4_sticky; \
153 _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size)); \
154 X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
158 #define _FP_FRAC_ADD_4(R, X, Y) \
159 __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
160 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
161 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
163 #define _FP_FRAC_SUB_4(R, X, Y) \
164 __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
165 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
166 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
168 #define _FP_FRAC_DEC_4(X, Y) \
169 __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
170 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
172 #define _FP_FRAC_ADDI_4(X, I) \
173 __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
175 #define _FP_ZEROFRAC_4 0, 0, 0, 0
176 #define _FP_MINFRAC_4 0, 0, 0, 1
177 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
179 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
180 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
181 #define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
182 #define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
183 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
184 #define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
186 #define _FP_FRAC_EQ_4(X, Y) \
187 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
188 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
190 #define _FP_FRAC_GT_4(X, Y) \
191 (X##_f[3] > Y##_f[3] \
192 || (X##_f[3] == Y##_f[3] \
193 && (X##_f[2] > Y##_f[2] \
194 || (X##_f[2] == Y##_f[2] \
195 && (X##_f[1] > Y##_f[1] \
196 || (X##_f[1] == Y##_f[1] \
197 && X##_f[0] > Y##_f[0]))))))
199 #define _FP_FRAC_GE_4(X, Y) \
200 (X##_f[3] > Y##_f[3] \
201 || (X##_f[3] == Y##_f[3] \
202 && (X##_f[2] > Y##_f[2] \
203 || (X##_f[2] == Y##_f[2] \
204 && (X##_f[1] > Y##_f[1] \
205 || (X##_f[1] == Y##_f[1] \
206 && X##_f[0] >= Y##_f[0]))))))
209 #define _FP_FRAC_CLZ_4(R, X) \
213 __FP_CLZ ((R), X##_f[3]); \
216 __FP_CLZ ((R), X##_f[2]); \
217 (R) += _FP_W_TYPE_SIZE; \
221 __FP_CLZ ((R), X##_f[1]); \
222 (R) += _FP_W_TYPE_SIZE*2; \
226 __FP_CLZ ((R), X##_f[0]); \
227 (R) += _FP_W_TYPE_SIZE*3; \
233 #define _FP_UNPACK_RAW_4(fs, X, val) \
236 union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
237 _FP_UNPACK_RAW_4_flo.flt = (val); \
238 X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
239 X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
240 X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
241 X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
242 X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
243 X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
247 #define _FP_UNPACK_RAW_4_P(fs, X, val) \
250 union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
251 = (union _FP_UNION_##fs *) (val); \
253 X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
254 X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
255 X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
256 X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
257 X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
258 X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
262 #define _FP_PACK_RAW_4(fs, val, X) \
265 union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
266 _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
267 _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
268 _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
269 _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
270 _FP_PACK_RAW_4_flo.bits.exp = X##_e; \
271 _FP_PACK_RAW_4_flo.bits.sign = X##_s; \
272 (val) = _FP_PACK_RAW_4_flo.flt; \
276 #define _FP_PACK_RAW_4_P(fs, val, X) \
279 union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
280 = (union _FP_UNION_##fs *) (val); \
282 _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
283 _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
284 _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
285 _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
286 _FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
287 _FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
291 /* Multiplication algorithms: */
293 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
295 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
298 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
299 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
300 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
301 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
302 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
304 doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
305 X##_f[0], Y##_f[0]); \
306 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
307 X##_f[0], Y##_f[1]); \
308 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
309 X##_f[1], Y##_f[0]); \
310 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
311 X##_f[1], Y##_f[1]); \
312 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
313 X##_f[0], Y##_f[2]); \
314 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
315 X##_f[2], Y##_f[0]); \
316 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
317 _FP_FRAC_WORD_8 (R, 1), 0, \
318 _FP_MUL_MEAT_DW_4_wide_b_f1, \
319 _FP_MUL_MEAT_DW_4_wide_b_f0, \
320 0, 0, _FP_FRAC_WORD_8 (R, 1)); \
321 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
322 _FP_FRAC_WORD_8 (R, 1), 0, \
323 _FP_MUL_MEAT_DW_4_wide_c_f1, \
324 _FP_MUL_MEAT_DW_4_wide_c_f0, \
325 _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
326 _FP_FRAC_WORD_8 (R, 1)); \
327 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
328 _FP_FRAC_WORD_8 (R, 2), 0, \
329 _FP_MUL_MEAT_DW_4_wide_d_f1, \
330 _FP_MUL_MEAT_DW_4_wide_d_f0, \
331 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
332 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
333 _FP_FRAC_WORD_8 (R, 2), 0, \
334 _FP_MUL_MEAT_DW_4_wide_e_f1, \
335 _FP_MUL_MEAT_DW_4_wide_e_f0, \
336 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
337 _FP_FRAC_WORD_8 (R, 2)); \
338 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
339 _FP_FRAC_WORD_8 (R, 2), 0, \
340 _FP_MUL_MEAT_DW_4_wide_f_f1, \
341 _FP_MUL_MEAT_DW_4_wide_f_f0, \
342 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
343 _FP_FRAC_WORD_8 (R, 2)); \
344 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
345 _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
346 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
347 _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
348 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
349 X##_f[1], Y##_f[2]); \
350 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
351 X##_f[2], Y##_f[1]); \
352 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
353 _FP_FRAC_WORD_8 (R, 3), 0, \
354 _FP_MUL_MEAT_DW_4_wide_b_f1, \
355 _FP_MUL_MEAT_DW_4_wide_b_f0, \
356 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
357 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
358 _FP_FRAC_WORD_8 (R, 3), 0, \
359 _FP_MUL_MEAT_DW_4_wide_c_f1, \
360 _FP_MUL_MEAT_DW_4_wide_c_f0, \
361 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
362 _FP_FRAC_WORD_8 (R, 3)); \
363 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
364 _FP_FRAC_WORD_8 (R, 3), 0, \
365 _FP_MUL_MEAT_DW_4_wide_d_f1, \
366 _FP_MUL_MEAT_DW_4_wide_d_f0, \
367 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
368 _FP_FRAC_WORD_8 (R, 3)); \
369 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
370 _FP_FRAC_WORD_8 (R, 3), 0, \
371 _FP_MUL_MEAT_DW_4_wide_e_f1, \
372 _FP_MUL_MEAT_DW_4_wide_e_f0, \
373 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
374 _FP_FRAC_WORD_8 (R, 3)); \
375 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
376 X##_f[2], Y##_f[2]); \
377 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
378 X##_f[1], Y##_f[3]); \
379 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
380 X##_f[3], Y##_f[1]); \
381 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
382 X##_f[2], Y##_f[3]); \
383 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
384 X##_f[3], Y##_f[2]); \
385 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
386 _FP_FRAC_WORD_8 (R, 4), 0, \
387 _FP_MUL_MEAT_DW_4_wide_b_f1, \
388 _FP_MUL_MEAT_DW_4_wide_b_f0, \
389 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
390 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
391 _FP_FRAC_WORD_8 (R, 4), 0, \
392 _FP_MUL_MEAT_DW_4_wide_c_f1, \
393 _FP_MUL_MEAT_DW_4_wide_c_f0, \
394 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
395 _FP_FRAC_WORD_8 (R, 4)); \
396 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
397 _FP_FRAC_WORD_8 (R, 4), 0, \
398 _FP_MUL_MEAT_DW_4_wide_d_f1, \
399 _FP_MUL_MEAT_DW_4_wide_d_f0, \
400 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
401 _FP_FRAC_WORD_8 (R, 4)); \
402 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
403 _FP_FRAC_WORD_8 (R, 5), 0, \
404 _FP_MUL_MEAT_DW_4_wide_e_f1, \
405 _FP_MUL_MEAT_DW_4_wide_e_f0, \
406 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
407 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
408 _FP_FRAC_WORD_8 (R, 5), 0, \
409 _FP_MUL_MEAT_DW_4_wide_f_f1, \
410 _FP_MUL_MEAT_DW_4_wide_f_f0, \
411 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
412 _FP_FRAC_WORD_8 (R, 5)); \
413 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
414 X##_f[3], Y##_f[3]); \
415 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
416 _FP_MUL_MEAT_DW_4_wide_b_f1, \
417 _FP_MUL_MEAT_DW_4_wide_b_f0, \
418 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
422 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
425 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
427 _FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z, \
430 /* Normalize since we know where the msb of the multiplicands \
431 were (bit B), we know that the msb of the of the product is \
432 at either 2B or 2B-1. */ \
433 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1, \
435 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
436 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
437 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
438 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
442 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
445 mpn_mul_n (R##_f, _x_f, _y_f, 4); \
449 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
452 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
454 _FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y); \
456 /* Normalize since we know where the msb of the multiplicands \
457 were (bit B), we know that the msb of the of the product is \
458 at either 2B or 2B-1. */ \
459 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1, \
461 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
462 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
463 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
464 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
468 /* Helper utility for _FP_DIV_MEAT_4_udiv:
470 #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
473 UWtype umul_ppppmnnn_t; \
474 umul_ppmm (p1, p0, m, n0); \
475 umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
476 __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
477 umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
478 __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
482 /* Division algorithms: */
484 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
487 int _FP_DIV_MEAT_4_udiv_i; \
488 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
489 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
490 _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
491 if (_FP_FRAC_GE_4 (X, Y)) \
493 _FP_DIV_MEAT_4_udiv_n_f[3] \
494 = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
495 _FP_FRAC_SRL_4 (X, 1); \
500 /* Normalize, i.e. make the most significant bit of the \
501 denominator set. */ \
502 _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
504 for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
506 if (X##_f[3] == Y##_f[3]) \
508 /* This is a special case, not an optimization \
509 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
510 As X## is guaranteed to be < Y, \
511 R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
512 (UWtype)-1 or (UWtype)-2. */ \
513 R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
514 if (!_FP_DIV_MEAT_4_udiv_i) \
516 __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
517 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
518 X##_f[2], X##_f[1], X##_f[0], \
519 _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
520 _FP_FRAC_SUB_4 (X, Y, X); \
521 if (X##_f[3] > Y##_f[3]) \
523 R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
524 _FP_FRAC_ADD_4 (X, Y, X); \
529 udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
530 X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
531 umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
532 _FP_DIV_MEAT_4_udiv_m_f[2], \
533 _FP_DIV_MEAT_4_udiv_m_f[1], \
534 _FP_DIV_MEAT_4_udiv_m_f[0], \
535 R##_f[_FP_DIV_MEAT_4_udiv_i], \
536 Y##_f[2], Y##_f[1], Y##_f[0]); \
537 X##_f[2] = X##_f[1]; \
538 X##_f[1] = X##_f[0]; \
540 = _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
541 if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
543 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
544 _FP_FRAC_ADD_4 (X, Y, X); \
545 if (_FP_FRAC_GE_4 (X, Y) \
546 && _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
548 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
549 _FP_FRAC_ADD_4 (X, Y, X); \
552 _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
553 if (!_FP_DIV_MEAT_4_udiv_i) \
555 if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
556 R##_f[0] |= _FP_WORK_STICKY; \
565 /* Square root algorithms:
566 We have just one right now, maybe Newton approximation
567 should be added for those machines where division is fast. */
569 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
574 T##_f[3] = S##_f[3] + (q); \
575 if (T##_f[3] <= X##_f[3]) \
577 S##_f[3] = T##_f[3] + (q); \
578 X##_f[3] -= T##_f[3]; \
581 _FP_FRAC_SLL_4 (X, 1); \
584 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
587 T##_f[2] = S##_f[2] + (q); \
588 T##_f[3] = S##_f[3]; \
589 if (T##_f[3] < X##_f[3] \
590 || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
592 S##_f[2] = T##_f[2] + (q); \
593 S##_f[3] += (T##_f[2] > S##_f[2]); \
594 __FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
595 T##_f[3], T##_f[2]); \
598 _FP_FRAC_SLL_4 (X, 1); \
601 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
604 T##_f[1] = S##_f[1] + (q); \
605 T##_f[2] = S##_f[2]; \
606 T##_f[3] = S##_f[3]; \
607 if (T##_f[3] < X##_f[3] \
608 || (T##_f[3] == X##_f[3] \
609 && (T##_f[2] < X##_f[2] \
610 || (T##_f[2] == X##_f[2] \
611 && T##_f[1] <= X##_f[1])))) \
613 S##_f[1] = T##_f[1] + (q); \
614 S##_f[2] += (T##_f[1] > S##_f[1]); \
615 S##_f[3] += (T##_f[2] > S##_f[2]); \
616 __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
617 T##_f[3], T##_f[2], T##_f[1]); \
620 _FP_FRAC_SLL_4 (X, 1); \
623 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
624 while ((q) != _FP_WORK_ROUND) \
626 T##_f[0] = S##_f[0] + (q); \
627 T##_f[1] = S##_f[1]; \
628 T##_f[2] = S##_f[2]; \
629 T##_f[3] = S##_f[3]; \
630 if (_FP_FRAC_GE_4 (X, T)) \
632 S##_f[0] = T##_f[0] + (q); \
633 S##_f[1] += (T##_f[0] > S##_f[0]); \
634 S##_f[2] += (T##_f[1] > S##_f[1]); \
635 S##_f[3] += (T##_f[2] > S##_f[2]); \
636 _FP_FRAC_DEC_4 (X, T); \
639 _FP_FRAC_SLL_4 (X, 1); \
642 if (!_FP_FRAC_ZEROP_4 (X)) \
644 if (_FP_FRAC_GT_4 (X, S)) \
645 R##_f[0] |= _FP_WORK_ROUND; \
646 R##_f[0] |= _FP_WORK_STICKY; \
654 #define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
655 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
657 #ifndef __FP_FRAC_ADD_3
658 # define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
661 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
663 __FP_FRAC_ADD_3_c1 = r0 < x0; \
665 __FP_FRAC_ADD_3_c2 = r1 < x1; \
666 r1 += __FP_FRAC_ADD_3_c1; \
667 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
668 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
673 #ifndef __FP_FRAC_ADD_4
674 # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
677 _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
678 _FP_W_TYPE __FP_FRAC_ADD_4_c3; \
680 __FP_FRAC_ADD_4_c1 = r0 < x0; \
682 __FP_FRAC_ADD_4_c2 = r1 < x1; \
683 r1 += __FP_FRAC_ADD_4_c1; \
684 __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
686 __FP_FRAC_ADD_4_c3 = r2 < x2; \
687 r2 += __FP_FRAC_ADD_4_c2; \
688 __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
689 r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
694 #ifndef __FP_FRAC_SUB_3
695 # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
698 _FP_W_TYPE __FP_FRAC_SUB_3_tmp[2]; \
699 _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
700 __FP_FRAC_SUB_3_tmp[0] = x0 - y0; \
701 __FP_FRAC_SUB_3_c1 = __FP_FRAC_SUB_3_tmp[0] > x0; \
702 __FP_FRAC_SUB_3_tmp[1] = x1 - y1; \
703 __FP_FRAC_SUB_3_c2 = __FP_FRAC_SUB_3_tmp[1] > x1; \
704 __FP_FRAC_SUB_3_tmp[1] -= __FP_FRAC_SUB_3_c1; \
705 __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
706 r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
707 r1 = __FP_FRAC_SUB_3_tmp[1]; \
708 r0 = __FP_FRAC_SUB_3_tmp[0]; \
713 #ifndef __FP_FRAC_SUB_4
714 # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
717 _FP_W_TYPE __FP_FRAC_SUB_4_tmp[3]; \
718 _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
719 _FP_W_TYPE __FP_FRAC_SUB_4_c3; \
720 __FP_FRAC_SUB_4_tmp[0] = x0 - y0; \
721 __FP_FRAC_SUB_4_c1 = __FP_FRAC_SUB_4_tmp[0] > x0; \
722 __FP_FRAC_SUB_4_tmp[1] = x1 - y1; \
723 __FP_FRAC_SUB_4_c2 = __FP_FRAC_SUB_4_tmp[1] > x1; \
724 __FP_FRAC_SUB_4_tmp[1] -= __FP_FRAC_SUB_4_c1; \
725 __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
726 __FP_FRAC_SUB_4_tmp[2] = x2 - y2; \
727 __FP_FRAC_SUB_4_c3 = __FP_FRAC_SUB_4_tmp[2] > x2; \
728 __FP_FRAC_SUB_4_tmp[2] -= __FP_FRAC_SUB_4_c2; \
729 __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
730 r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
731 r2 = __FP_FRAC_SUB_4_tmp[2]; \
732 r1 = __FP_FRAC_SUB_4_tmp[1]; \
733 r0 = __FP_FRAC_SUB_4_tmp[0]; \
738 #ifndef __FP_FRAC_DEC_3
739 # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
742 UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
743 UWtype __FP_FRAC_DEC_3_t2; \
744 __FP_FRAC_DEC_3_t0 = x0; \
745 __FP_FRAC_DEC_3_t1 = x1; \
746 __FP_FRAC_DEC_3_t2 = x2; \
747 __FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
748 __FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
754 #ifndef __FP_FRAC_DEC_4
755 # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
758 UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
759 UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
760 __FP_FRAC_DEC_4_t0 = x0; \
761 __FP_FRAC_DEC_4_t1 = x1; \
762 __FP_FRAC_DEC_4_t2 = x2; \
763 __FP_FRAC_DEC_4_t3 = x3; \
764 __FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
765 __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
766 __FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
771 #ifndef __FP_FRAC_ADDI_4
772 # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
775 UWtype __FP_FRAC_ADDI_4_t; \
776 __FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
777 x1 += __FP_FRAC_ADDI_4_t; \
778 __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
779 x2 += __FP_FRAC_ADDI_4_t; \
780 __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
781 x3 += __FP_FRAC_ADDI_4_t; \
786 /* Convert FP values between word sizes. This appears to be more
787 complicated than I'd have expected it to be, so these might be
788 wrong... These macros are in any case somewhat bogus because they
789 use information about what various FRAC_n variables look like
790 internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
791 the ones in op-2.h and op-1.h. */
792 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
794 #define _FP_FRAC_COPY_2_4(D, S) \
802 /* Assembly/disassembly for converting to/from integral types.
803 No shifting or overflow handled here. */
804 /* Put the FP value X into r, which is an integer of size rsize. */
805 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
808 if ((rsize) <= _FP_W_TYPE_SIZE) \
810 else if ((rsize) <= 2*_FP_W_TYPE_SIZE) \
813 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
815 : (r) << _FP_W_TYPE_SIZE); \
820 /* I'm feeling lazy so we deal with int == 3words \
821 (implausible) and int == 4words as a single case. */ \
823 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
825 : (r) << _FP_W_TYPE_SIZE); \
827 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
829 : (r) << _FP_W_TYPE_SIZE); \
831 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
833 : (r) << _FP_W_TYPE_SIZE); \
839 /* "No disassemble Number Five!" */
840 /* Move an integer of size rsize into X's fractional part. We rely on
841 the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
842 having to mask the values we store into it. */
843 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
847 X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE \
849 : (r) >> _FP_W_TYPE_SIZE); \
850 X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE \
852 : (r) >> 2*_FP_W_TYPE_SIZE); \
853 X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE \
855 : (r) >> 3*_FP_W_TYPE_SIZE); \
859 #define _FP_FRAC_COPY_4_1(D, S) \
863 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
867 #define _FP_FRAC_COPY_4_2(D, S) \
872 D##_f[2] = D##_f[3] = 0; \
876 #define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)
878 #endif /* !SOFT_FP_OP_4_H */