soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
  34 #define _FP_FRAC_COPY_2(D, S)   (D##_f0 = S##_f0, D##_f1 = S##_f1)
  35 #define _FP_FRAC_SET_2(X, I)    __FP_FRAC_SET_2 (X, I)
  36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  38 #define _FP_FRAC_WORD_2(X, w)   (X##_f##w)
  39
  40 #define _FP_FRAC_SLL_2(X, N)                                            \
  41   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  42           ? ({                                                          \
  43               if (__builtin_constant_p (N) && (N) == 1)                 \
  44                 {                                                       \
  45                   X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
  46                   X##_f0 += X##_f0;                                     \
  47                 }                                                       \
  48               else                                                      \
  49                 {                                                       \
  50                   X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  51                   X##_f0 <<= (N);                                       \
  52                 }                                                       \
  53               0;                                                        \
  54             })                                                          \
  55           : ({                                                          \
  56               X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);               \
  57               X##_f0 = 0;                                               \
  58             }))
  59
  60
  61 #define _FP_FRAC_SRL_2(X, N)                                            \
  62   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  63           ? ({                                                          \
  64               X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  65               X##_f1 >>= (N);                                           \
  66             })                                                          \
  67           : ({                                                          \
  68               X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);               \
  69               X##_f1 = 0;                                               \
  70             }))
  71
  72 /* Right shift with sticky-lsb.  */
  73 #define _FP_FRAC_SRST_2(X, S, N, sz)                                    \
  74   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  75           ? ({                                                          \
  76               S = (__builtin_constant_p (N) && (N) == 1                 \
  77                    ? X##_f0 & 1                                         \
  78                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);         \
  79               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
  80               X##_f1 >>= (N);                                           \
  81             })                                                          \
  82           : ({                                                          \
  83               S = ((((N) == _FP_W_TYPE_SIZE                             \
  84                      ? 0                                                \
  85                      : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))           \
  86                     | X##_f0) != 0);                                    \
  87               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));             \
  88               X##_f1 = 0;                                               \
  89             }))
  90
  91 #define _FP_FRAC_SRS_2(X, N, sz)                                        \
  92   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  93           ? ({                                                          \
  94               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
  95                         | (__builtin_constant_p (N) && (N) == 1         \
  96                            ? X##_f0 & 1                                 \
  97                            : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
  98               X##_f1 >>= (N);                                           \
  99             })                                                          \
 100           : ({                                                          \
 101               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)               \
 102                         | ((((N) == _FP_W_TYPE_SIZE                     \
 103                              ? 0                                        \
 104                              : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))   \
 105                             | X##_f0) != 0));                           \
 106               X##_f1 = 0;                                               \
 107             }))
 108
 109 #define _FP_FRAC_ADDI_2(X, I)   \
 110   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
 111
 112 #define _FP_FRAC_ADD_2(R, X, Y) \
 113   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 114
 115 #define _FP_FRAC_SUB_2(R, X, Y) \
 116   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 117
 118 #define _FP_FRAC_DEC_2(X, Y)    \
 119   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
 120
 121 #define _FP_FRAC_CLZ_2(R, X)                    \
 122   do                                            \
 123     {                                           \
 124       if (X##_f1)                               \
 125         __FP_CLZ (R, X##_f1);                   \
 126       else                                      \
 127         {                                       \
 128           __FP_CLZ (R, X##_f0);                 \
 129           R += _FP_W_TYPE_SIZE;                 \
 130         }                                       \
 131     }                                           \
 132   while (0)
 133
 134 /* Predicates */
 135 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE) X##_f1 < 0)
 136 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
 138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
 139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
 140   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
 141 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 142 #define _FP_FRAC_GT_2(X, Y)     \
 143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 144 #define _FP_FRAC_GE_2(X, Y)     \
 145   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 146
 147 #define _FP_ZEROFRAC_2          0, 0
 148 #define _FP_MINFRAC_2           0, 1
 149 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
 150
 151 /*
 152  * Internals
 153  */
 154
 155 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
 156
 157 #define __FP_CLZ_2(R, xh, xl)                   \
 158   do                                            \
 159     {                                           \
 160       if (xh)                                   \
 161         __FP_CLZ (R, xh);                       \
 162       else                                      \
 163         {                                       \
 164           __FP_CLZ (R, xl);                     \
 165           R += _FP_W_TYPE_SIZE;                 \
 166         }                                       \
 167     }                                           \
 168   while (0)
 169
 170 #if 0
 171
 172 # ifndef __FP_FRAC_ADDI_2
 173 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
 174   (xh += ((xl += i) < i))
 175 # endif
 176 # ifndef __FP_FRAC_ADD_2
 177 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)       \
 178   (rh = xh + yh + ((rl = xl + yl) < xl))
 179 # endif
 180 # ifndef __FP_FRAC_SUB_2
 181 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)       \
 182   (rh = xh - yh - ((rl = xl - yl) > xl))
 183 # endif
 184 # ifndef __FP_FRAC_DEC_2
 185 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)               \
 186   do                                                    \
 187     {                                                   \
 188       UWtype __FP_FRAC_DEC_2_t = xl;                    \
 189       xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);      \
 190     }                                                   \
 191   while (0)
 192 # endif
 193
 194 #else
 195
 196 # undef __FP_FRAC_ADDI_2
 197 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
 198 # undef __FP_FRAC_ADD_2
 199 # define __FP_FRAC_ADD_2                add_ssaaaa
 200 # undef __FP_FRAC_SUB_2
 201 # define __FP_FRAC_SUB_2                sub_ddmmss
 202 # undef __FP_FRAC_DEC_2
 203 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)        \
 204   sub_ddmmss (xh, xl, xh, xl, yh, yl)
 205
 206 #endif
 207
 208 /*
 209  * Unpack the raw bits of a native fp value.  Do not classify or
 210  * normalize the data.
 211  */
 212
 213 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
 214   do                                                    \
 215     {                                                   \
 216       union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;        \
 217       _FP_UNPACK_RAW_2_flo.flt = (val);                 \
 218                                                         \
 219       X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;         \
 220       X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;         \
 221       X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;           \
 222       X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;          \
 223     }                                                   \
 224   while (0)
 225
 226 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
 227   do                                                    \
 228     {                                                   \
 229       union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo      \
 230         = (union _FP_UNION_##fs *) (val);               \
 231                                                         \
 232       X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;      \
 233       X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;      \
 234       X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;        \
 235       X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;       \
 236     }                                                   \
 237   while (0)
 238
 239
 240 /*
 241  * Repack the raw bits of a native fp value.
 242  */
 243
 244 #define _FP_PACK_RAW_2(fs, val, X)              \
 245   do                                            \
 246     {                                           \
 247       union _FP_UNION_##fs _FP_PACK_RAW_2_flo;  \
 248                                                 \
 249       _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;   \
 250       _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;   \
 251       _FP_PACK_RAW_2_flo.bits.exp   = X##_e;    \
 252       _FP_PACK_RAW_2_flo.bits.sign  = X##_s;    \
 253                                                 \
 254       (val) = _FP_PACK_RAW_2_flo.flt;           \
 255     }                                           \
 256   while (0)
 257
 258 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
 259   do                                                    \
 260     {                                                   \
 261       union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo        \
 262         = (union _FP_UNION_##fs *) (val);               \
 263                                                         \
 264       _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;        \
 265       _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;        \
 266       _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;         \
 267       _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;         \
 268     }                                                   \
 269   while (0)
 270
 271
 272 /*
 273  * Multiplication algorithms:
 274  */
 275
 276 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 277
 278 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
 279   do                                                                    \
 280     {                                                                   \
 281       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);                       \
 282       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);                       \
 283                                                                         \
 284       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),             \
 285             X##_f0, Y##_f0);                                            \
 286       doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,   \
 287             X##_f0, Y##_f1);                                            \
 288       doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,   \
 289             X##_f1, Y##_f0);                                            \
 290       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),             \
 291             X##_f1, Y##_f1);                                            \
 292                                                                         \
 293       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 294                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 295                        _FP_MUL_MEAT_DW_2_wide_b_f1,                     \
 296                        _FP_MUL_MEAT_DW_2_wide_b_f0,                     \
 297                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 298                        _FP_FRAC_WORD_4 (R, 1));                         \
 299       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 300                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 301                        _FP_MUL_MEAT_DW_2_wide_c_f1,                     \
 302                        _FP_MUL_MEAT_DW_2_wide_c_f0,                     \
 303                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 304                        _FP_FRAC_WORD_4 (R, 1));                         \
 305     }                                                                   \
 306   while (0)
 307
 308 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 309   do                                                                    \
 310     {                                                                   \
 311       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);                          \
 312                                                                         \
 313       _FP_MUL_MEAT_DW_2_wide (wfracbits, _FP_MUL_MEAT_2_wide_z,         \
 314                               X, Y, doit);                              \
 315                                                                         \
 316       /* Normalize since we know where the msb of the multiplicands     \
 317          were (bit B), we know that the msb of the of the product is    \
 318          at either 2B or 2B-1.  */                                      \
 319       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, wfracbits-1, 2*wfracbits); \
 320       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);              \
 321       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);              \
 322     }                                                                   \
 323   while (0)
 324
 325 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 326    Do only 3 multiplications instead of four. This one is for machines
 327    where multiplication is much more expensive than subtraction.  */
 328
 329 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
 330   do                                                                    \
 331     {                                                                   \
 332       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);                  \
 333       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);                  \
 334       _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;                         \
 335       int _FP_MUL_MEAT_DW_2_wide_3mul_c1;                               \
 336       int _FP_MUL_MEAT_DW_2_wide_3mul_c2;                               \
 337                                                                         \
 338       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;               \
 339       _FP_MUL_MEAT_DW_2_wide_3mul_c1                                    \
 340         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;                    \
 341       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;               \
 342       _FP_MUL_MEAT_DW_2_wide_3mul_c2                                    \
 343         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;                    \
 344       doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),      \
 345             X##_f0, Y##_f0);                                            \
 346       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),             \
 347             _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,                           \
 348             _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);                          \
 349       doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                           \
 350             _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);          \
 351                                                                         \
 352       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0                                  \
 353         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;                             \
 354       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1                                  \
 355         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;                             \
 356       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 357                        _FP_FRAC_WORD_4 (R, 1),                          \
 358                        (_FP_MUL_MEAT_DW_2_wide_3mul_c1                  \
 359                         & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,           \
 360                        _FP_MUL_MEAT_DW_2_wide_3mul_d,                   \
 361                        0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
 362       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 363                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f0);              \
 364       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 365                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);              \
 366       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 367                        _FP_FRAC_WORD_4 (R, 1),                          \
 368                        0, _FP_MUL_MEAT_DW_2_wide_3mul_d,                \
 369                        _FP_FRAC_WORD_4 (R, 0));                         \
 370       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 371                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 372                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
 373                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);               \
 374       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 375                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
 376                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,                \
 377                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
 378     }                                                                   \
 379   while (0)
 380
 381 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 382   do                                                                    \
 383     {                                                                   \
 384       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);                     \
 385                                                                         \
 386       _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits,                           \
 387                                    _FP_MUL_MEAT_2_wide_3mul_z,          \
 388                                    X, Y, doit);                         \
 389                                                                         \
 390       /* Normalize since we know where the msb of the multiplicands     \
 391          were (bit B), we know that the msb of the of the product is    \
 392          at either 2B or 2B-1.  */                                      \
 393       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,                       \
 394                       wfracbits-1, 2*wfracbits);                        \
 395       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);         \
 396       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);         \
 397     }                                                                   \
 398   while (0)
 399
 400 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)       \
 401   do                                                    \
 402     {                                                   \
 403       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];            \
 404       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];            \
 405       _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;              \
 406       _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;              \
 407       _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;              \
 408       _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;              \
 409                                                         \
 410       mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,        \
 411                  _FP_MUL_MEAT_DW_2_gmp_y, 2);           \
 412     }                                                   \
 413   while (0)
 414
 415 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 416   do                                                                    \
 417     {                                                                   \
 418       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);                           \
 419                                                                         \
 420       _FP_MUL_MEAT_DW_2_gmp (wfracbits, _FP_MUL_MEAT_2_gmp_z, X, Y);    \
 421                                                                         \
 422       /* Normalize since we know where the msb of the multiplicands     \
 423          were (bit B), we know that the msb of the of the product is    \
 424          at either 2B or 2B-1.  */                                      \
 425       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, wfracbits-1, 2*wfracbits);  \
 426       R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];                               \
 427       R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];                               \
 428     }                                                                   \
 429   while (0)
 430
 431 /* Do at most 120x120=240 bits multiplication using double floating
 432    point multiplication.  This is useful if floating point
 433    multiplication has much bigger throughput than integer multiply.
 434    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 435    between 106 and 120 only.
 436    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 437    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 438    towards zero,  RESETFE should optionally reset it back.  */
 439
 440 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
 441   do                                                                    \
 442     {                                                                   \
 443       static const double _const[] =                                    \
 444         {                                                               \
 445           /* 2^-24 */ 5.9604644775390625e-08,                           \
 446           /* 2^-48 */ 3.5527136788005009e-15,                           \
 447           /* 2^-72 */ 2.1175823681357508e-22,                           \
 448           /* 2^-96 */ 1.2621774483536189e-29,                           \
 449           /* 2^28 */ 2.68435456e+08,                                    \
 450           /* 2^4 */ 1.600000e+01,                                       \
 451           /* 2^-20 */ 9.5367431640625e-07,                              \
 452           /* 2^-44 */ 5.6843418860808015e-14,                           \
 453           /* 2^-68 */ 3.3881317890172014e-21,                           \
 454           /* 2^-92 */ 2.0194839173657902e-28,                           \
 455           /* 2^-116 */ 1.2037062152420224e-35                           \
 456         };                                                              \
 457       double _a240, _b240, _c240, _d240, _e240, _f240,                  \
 458         _g240, _h240, _i240, _j240, _k240;                              \
 459       union { double d; UDItype i; } _l240, _m240, _n240, _o240,        \
 460                                        _p240, _q240, _r240, _s240;      \
 461       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;             \
 462                                                                         \
 463       if (wfracbits < 106 || wfracbits > 120)                           \
 464         abort ();                                                       \
 465                                                                         \
 466       setfetz;                                                          \
 467                                                                         \
 468       _e240 = (double) (long) (X##_f0 & 0xffffff);                      \
 469       _j240 = (double) (long) (Y##_f0 & 0xffffff);                      \
 470       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);              \
 471       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);              \
 472       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
 473       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
 474       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);               \
 475       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);               \
 476       _a240 = (double) (long) (X##_f1 >> 32);                           \
 477       _f240 = (double) (long) (Y##_f1 >> 32);                           \
 478       _e240 *= _const[3];                                               \
 479       _j240 *= _const[3];                                               \
 480       _d240 *= _const[2];                                               \
 481       _i240 *= _const[2];                                               \
 482       _c240 *= _const[1];                                               \
 483       _h240 *= _const[1];                                               \
 484       _b240 *= _const[0];                                               \
 485       _g240 *= _const[0];                                               \
 486       _s240.d =                                                       _e240*_j240; \
 487       _r240.d =                                         _d240*_j240 + _e240*_i240; \
 488       _q240.d =                           _c240*_j240 + _d240*_i240 + _e240*_h240; \
 489       _p240.d =             _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
 490       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
 491       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;  \
 492       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                \
 493       _l240.d = _a240*_g240 + _b240*_f240;                              \
 494       _k240 =   _a240*_f240;                                            \
 495       _r240.d += _s240.d;                                               \
 496       _q240.d += _r240.d;                                               \
 497       _p240.d += _q240.d;                                               \
 498       _o240.d += _p240.d;                                               \
 499       _n240.d += _o240.d;                                               \
 500       _m240.d += _n240.d;                                               \
 501       _l240.d += _m240.d;                                               \
 502       _k240 += _l240.d;                                                 \
 503       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                     \
 504       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                       \
 505       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                       \
 506       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                       \
 507       _o240.d += _const[7];                                             \
 508       _n240.d += _const[6];                                             \
 509       _m240.d += _const[5];                                             \
 510       _l240.d += _const[4];                                             \
 511       if (_s240.d != 0.0)                                               \
 512         _y240 = 1;                                                      \
 513       if (_r240.d != 0.0)                                               \
 514         _y240 = 1;                                                      \
 515       if (_q240.d != 0.0)                                               \
 516         _y240 = 1;                                                      \
 517       if (_p240.d != 0.0)                                               \
 518         _y240 = 1;                                                      \
 519       _t240 = (DItype) _k240;                                           \
 520       _u240 = _l240.i;                                                  \
 521       _v240 = _m240.i;                                                  \
 522       _w240 = _n240.i;                                                  \
 523       _x240 = _o240.i;                                                  \
 524       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                      \
 525                 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));     \
 526       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))         \
 527                 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))       \
 528                 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))       \
 529                 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))        \
 530                 | _y240);                                               \
 531       resetfe;                                                          \
 532     }                                                                   \
 533   while (0)
 534
 535 /*
 536  * Division algorithms:
 537  */
 538
 539 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 540   do                                                                    \
 541     {                                                                   \
 542       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;                              \
 543       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;                              \
 544       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;                              \
 545       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;                              \
 546       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;                              \
 547       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;                              \
 548       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;                              \
 549       if (_FP_FRAC_GE_2 (X, Y))                                         \
 550         {                                                               \
 551           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;                       \
 552           _FP_DIV_MEAT_2_udiv_n_f1                                      \
 553             = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;            \
 554           _FP_DIV_MEAT_2_udiv_n_f0                                      \
 555             = X##_f0 << (_FP_W_TYPE_SIZE - 1);                          \
 556         }                                                               \
 557       else                                                              \
 558         {                                                               \
 559           R##_e--;                                                      \
 560           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;                            \
 561           _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;                            \
 562           _FP_DIV_MEAT_2_udiv_n_f0 = 0;                                 \
 563         }                                                               \
 564                                                                         \
 565       /* Normalize, i.e. make the most significant bit of the           \
 566          denominator set. */                                            \
 567       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                          \
 568                                                                         \
 569       udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,                     \
 570                   _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,   \
 571                   Y##_f1);                                              \
 572       umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,    \
 573                  R##_f1, Y##_f0);                                       \
 574       _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;              \
 575       if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
 576         {                                                               \
 577           R##_f1--;                                                     \
 578           _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                     \
 579                           _FP_DIV_MEAT_2_udiv_r);                       \
 580           if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)                  \
 581               && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                  \
 582                                 _FP_DIV_MEAT_2_udiv_r))                 \
 583             {                                                           \
 584               R##_f1--;                                                 \
 585               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
 586                               _FP_DIV_MEAT_2_udiv_r);                   \
 587             }                                                           \
 588         }                                                               \
 589       _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);    \
 590                                                                         \
 591       if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)                           \
 592         {                                                               \
 593           /* This is a special case, not an optimization                \
 594              (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).  \
 595              As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,          \
 596              R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we  \
 597              know what kind of bits it is (sticky, guard, round),       \
 598              we don't care.  We also don't care what the reminder is,   \
 599              because the guard bit will be set anyway.  -jj */          \
 600           R##_f0 = -1;                                                  \
 601         }                                                               \
 602       else                                                              \
 603         {                                                               \
 604           udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,                 \
 605                       _FP_DIV_MEAT_2_udiv_r_f1,                         \
 606                       _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);                \
 607           umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,                          \
 608                      _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);         \
 609           _FP_DIV_MEAT_2_udiv_r_f0 = 0;                                 \
 610           if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                     \
 611                              _FP_DIV_MEAT_2_udiv_r))                    \
 612             {                                                           \
 613               R##_f0--;                                                 \
 614               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
 615                               _FP_DIV_MEAT_2_udiv_r);                   \
 616               if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)              \
 617                   && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,              \
 618                                     _FP_DIV_MEAT_2_udiv_r))             \
 619                 {                                                       \
 620                   R##_f0--;                                             \
 621                   _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,             \
 622                                   _FP_DIV_MEAT_2_udiv_r);               \
 623                 }                                                       \
 624             }                                                           \
 625           if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,                    \
 626                               _FP_DIV_MEAT_2_udiv_m))                   \
 627             R##_f0 |= _FP_WORK_STICKY;                                  \
 628         }                                                               \
 629     }                                                                   \
 630   while (0)
 631
 632
 633 /*
 634  * Square root algorithms:
 635  * We have just one right now, maybe Newton approximation
 636  * should be added for those machines where division is fast.
 637  */
 638
 639 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                          \
 640   do                                                            \
 641     {                                                           \
 642       while (q)                                                 \
 643         {                                                       \
 644           T##_f1 = S##_f1 + q;                                  \
 645           if (T##_f1 <= X##_f1)                                 \
 646             {                                                   \
 647               S##_f1 = T##_f1 + q;                              \
 648               X##_f1 -= T##_f1;                                 \
 649               R##_f1 += q;                                      \
 650             }                                                   \
 651           _FP_FRAC_SLL_2 (X, 1);                                \
 652           q >>= 1;                                              \
 653         }                                                       \
 654       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);              \
 655       while (q != _FP_WORK_ROUND)                               \
 656         {                                                       \
 657           T##_f0 = S##_f0 + q;                                  \
 658           T##_f1 = S##_f1;                                      \
 659           if (T##_f1 < X##_f1                                   \
 660               || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))        \
 661             {                                                   \
 662               S##_f0 = T##_f0 + q;                              \
 663               S##_f1 += (T##_f0 > S##_f0);                      \
 664               _FP_FRAC_DEC_2 (X, T);                            \
 665               R##_f0 += q;                                      \
 666             }                                                   \
 667           _FP_FRAC_SLL_2 (X, 1);                                \
 668           q >>= 1;                                              \
 669         }                                                       \
 670       if (X##_f0 | X##_f1)                                      \
 671         {                                                       \
 672           if (S##_f1 < X##_f1                                   \
 673               || (S##_f1 == X##_f1 && S##_f0 < X##_f0))         \
 674             R##_f0 |= _FP_WORK_ROUND;                           \
 675           R##_f0 |= _FP_WORK_STICKY;                            \
 676         }                                                       \
 677     }                                                           \
 678   while (0)
 679
 680
 681 /*
 682  * Assembly/disassembly for converting to/from integral types.
 683  * No shifting or overflow handled here.
 684  */
 685
 686 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 687   (void) ((rsize <= _FP_W_TYPE_SIZE)            \
 688           ? ({ r = X##_f0; })                   \
 689           : ({                                  \
 690               r = X##_f1;                       \
 691               r <<= _FP_W_TYPE_SIZE;            \
 692               r += X##_f0;                      \
 693             }))
 694
 695 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
 696   do                                                                    \
 697     {                                                                   \
 698       X##_f0 = r;                                                       \
 699       X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
 700     }                                                                   \
 701   while (0)
 702
 703 /*
 704  * Convert FP values between word sizes
 705  */
 706
 707 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
 708
 709 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
 710
 711 #define _FP_FRAC_COPY_2_2(D, S)         _FP_FRAC_COPY_2 (D, S)