soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
  34 #define _FP_FRAC_COPY_2(D, S)   (D##_f0 = S##_f0, D##_f1 = S##_f1)
  35 #define _FP_FRAC_SET_2(X, I)    __FP_FRAC_SET_2 (X, I)
  36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  38 #define _FP_FRAC_WORD_2(X, w)   (X##_f##w)
  39
  40 #define _FP_FRAC_SLL_2(X, N)                                            \
  41   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  42           ? ({                                                          \
  43               if (__builtin_constant_p (N) && (N) == 1)                 \
  44                 {                                                       \
  45                   X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
  46                   X##_f0 += X##_f0;                                     \
  47                 }                                                       \
  48               else                                                      \
  49                 {                                                       \
  50                   X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  51                   X##_f0 <<= (N);                                       \
  52                 }                                                       \
  53               0;                                                        \
  54             })                                                          \
  55           : ({                                                          \
  56               X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);               \
  57               X##_f0 = 0;                                               \
  58             }))
  59
  60
  61 #define _FP_FRAC_SRL_2(X, N)                                            \
  62   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  63           ? ({                                                          \
  64               X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  65               X##_f1 >>= (N);                                           \
  66             })                                                          \
  67           : ({                                                          \
  68               X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);               \
  69               X##_f1 = 0;                                               \
  70             }))
  71
  72 /* Right shift with sticky-lsb.  */
  73 #define _FP_FRAC_SRST_2(X, S, N, sz)                                    \
  74   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  75           ? ({                                                          \
  76               S = (__builtin_constant_p (N) && (N) == 1                 \
  77                    ? X##_f0 & 1                                         \
  78                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);         \
  79               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
  80               X##_f1 >>= (N);                                           \
  81             })                                                          \
  82           : ({                                                          \
  83               S = ((((N) == _FP_W_TYPE_SIZE                             \
  84                      ? 0                                                \
  85                      : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))           \
  86                     | X##_f0) != 0);                                    \
  87               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));             \
  88               X##_f1 = 0;                                               \
  89             }))
  90
  91 #define _FP_FRAC_SRS_2(X, N, sz)                                        \
  92   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  93           ? ({                                                          \
  94               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
  95                         | (__builtin_constant_p (N) && (N) == 1         \
  96                            ? X##_f0 & 1                                 \
  97                            : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
  98               X##_f1 >>= (N);                                           \
  99             })                                                          \
 100           : ({                                                          \
 101               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)               \
 102                         | ((((N) == _FP_W_TYPE_SIZE                     \
 103                              ? 0                                        \
 104                              : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))   \
 105                             | X##_f0) != 0));                           \
 106               X##_f1 = 0;                                               \
 107             }))
 108
 109 #define _FP_FRAC_ADDI_2(X, I)   \
 110   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
 111
 112 #define _FP_FRAC_ADD_2(R, X, Y) \
 113   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 114
 115 #define _FP_FRAC_SUB_2(R, X, Y) \
 116   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 117
 118 #define _FP_FRAC_DEC_2(X, Y)    \
 119   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
 120
 121 #define _FP_FRAC_CLZ_2(R, X)                    \
 122   do                                            \
 123     {                                           \
 124       if (X##_f1)                               \
 125         __FP_CLZ (R, X##_f1);                   \
 126       else                                      \
 127         {                                       \
 128           __FP_CLZ (R, X##_f0);                 \
 129           R += _FP_W_TYPE_SIZE;                 \
 130         }                                       \
 131     }                                           \
 132   while (0)
 133
 134 /* Predicates.  */
 135 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE) X##_f1 < 0)
 136 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
 138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
 139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
 140   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
 141 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 142 #define _FP_FRAC_GT_2(X, Y)     \
 143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 144 #define _FP_FRAC_GE_2(X, Y)     \
 145   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 146
 147 #define _FP_ZEROFRAC_2          0, 0
 148 #define _FP_MINFRAC_2           0, 1
 149 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
 150
 151 /* Internals.  */
 152
 153 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
 154
 155 #define __FP_CLZ_2(R, xh, xl)                   \
 156   do                                            \
 157     {                                           \
 158       if (xh)                                   \
 159         __FP_CLZ (R, xh);                       \
 160       else                                      \
 161         {                                       \
 162           __FP_CLZ (R, xl);                     \
 163           R += _FP_W_TYPE_SIZE;                 \
 164         }                                       \
 165     }                                           \
 166   while (0)
 167
 168 #if 0
 169
 170 # ifndef __FP_FRAC_ADDI_2
 171 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
 172   (xh += ((xl += i) < i))
 173 # endif
 174 # ifndef __FP_FRAC_ADD_2
 175 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)       \
 176   (rh = xh + yh + ((rl = xl + yl) < xl))
 177 # endif
 178 # ifndef __FP_FRAC_SUB_2
 179 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)       \
 180   (rh = xh - yh - ((rl = xl - yl) > xl))
 181 # endif
 182 # ifndef __FP_FRAC_DEC_2
 183 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)               \
 184   do                                                    \
 185     {                                                   \
 186       UWtype __FP_FRAC_DEC_2_t = xl;                    \
 187       xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);      \
 188     }                                                   \
 189   while (0)
 190 # endif
 191
 192 #else
 193
 194 # undef __FP_FRAC_ADDI_2
 195 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
 196 # undef __FP_FRAC_ADD_2
 197 # define __FP_FRAC_ADD_2                add_ssaaaa
 198 # undef __FP_FRAC_SUB_2
 199 # define __FP_FRAC_SUB_2                sub_ddmmss
 200 # undef __FP_FRAC_DEC_2
 201 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)        \
 202   sub_ddmmss (xh, xl, xh, xl, yh, yl)
 203
 204 #endif
 205
 206 /* Unpack the raw bits of a native fp value.  Do not classify or
 207    normalize the data.  */
 208
 209 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
 210   do                                                    \
 211     {                                                   \
 212       union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;        \
 213       _FP_UNPACK_RAW_2_flo.flt = (val);                 \
 214                                                         \
 215       X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;         \
 216       X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;         \
 217       X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;           \
 218       X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;          \
 219     }                                                   \
 220   while (0)
 221
 222 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
 223   do                                                    \
 224     {                                                   \
 225       union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo      \
 226         = (union _FP_UNION_##fs *) (val);               \
 227                                                         \
 228       X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;      \
 229       X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;      \
 230       X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;        \
 231       X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;       \
 232     }                                                   \
 233   while (0)
 234
 235
 236 /* Repack the raw bits of a native fp value.  */
 237
 238 #define _FP_PACK_RAW_2(fs, val, X)              \
 239   do                                            \
 240     {                                           \
 241       union _FP_UNION_##fs _FP_PACK_RAW_2_flo;  \
 242                                                 \
 243       _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;   \
 244       _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;   \
 245       _FP_PACK_RAW_2_flo.bits.exp   = X##_e;    \
 246       _FP_PACK_RAW_2_flo.bits.sign  = X##_s;    \
 247                                                 \
 248       (val) = _FP_PACK_RAW_2_flo.flt;           \
 249     }                                           \
 250   while (0)
 251
 252 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
 253   do                                                    \
 254     {                                                   \
 255       union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo        \
 256         = (union _FP_UNION_##fs *) (val);               \
 257                                                         \
 258       _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;        \
 259       _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;        \
 260       _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;         \
 261       _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;         \
 262     }                                                   \
 263   while (0)
 264
 265
 266 /* Multiplication algorithms: */
 267
 268 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 269
 270 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
 271   do                                                                    \
 272     {                                                                   \
 273       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);                       \
 274       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);                       \
 275                                                                         \
 276       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),             \
 277             X##_f0, Y##_f0);                                            \
 278       doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,   \
 279             X##_f0, Y##_f1);                                            \
 280       doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,   \
 281             X##_f1, Y##_f0);                                            \
 282       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),             \
 283             X##_f1, Y##_f1);                                            \
 284                                                                         \
 285       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 286                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 287                        _FP_MUL_MEAT_DW_2_wide_b_f1,                     \
 288                        _FP_MUL_MEAT_DW_2_wide_b_f0,                     \
 289                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 290                        _FP_FRAC_WORD_4 (R, 1));                         \
 291       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 292                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 293                        _FP_MUL_MEAT_DW_2_wide_c_f1,                     \
 294                        _FP_MUL_MEAT_DW_2_wide_c_f0,                     \
 295                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 296                        _FP_FRAC_WORD_4 (R, 1));                         \
 297     }                                                                   \
 298   while (0)
 299
 300 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 301   do                                                                    \
 302     {                                                                   \
 303       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);                          \
 304                                                                         \
 305       _FP_MUL_MEAT_DW_2_wide (wfracbits, _FP_MUL_MEAT_2_wide_z,         \
 306                               X, Y, doit);                              \
 307                                                                         \
 308       /* Normalize since we know where the msb of the multiplicands     \
 309          were (bit B), we know that the msb of the of the product is    \
 310          at either 2B or 2B-1.  */                                      \
 311       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, wfracbits-1, 2*wfracbits); \
 312       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);              \
 313       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);              \
 314     }                                                                   \
 315   while (0)
 316
 317 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 318    Do only 3 multiplications instead of four. This one is for machines
 319    where multiplication is much more expensive than subtraction.  */
 320
 321 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
 322   do                                                                    \
 323     {                                                                   \
 324       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);                  \
 325       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);                  \
 326       _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;                         \
 327       int _FP_MUL_MEAT_DW_2_wide_3mul_c1;                               \
 328       int _FP_MUL_MEAT_DW_2_wide_3mul_c2;                               \
 329                                                                         \
 330       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;               \
 331       _FP_MUL_MEAT_DW_2_wide_3mul_c1                                    \
 332         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;                    \
 333       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;               \
 334       _FP_MUL_MEAT_DW_2_wide_3mul_c2                                    \
 335         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;                    \
 336       doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),      \
 337             X##_f0, Y##_f0);                                            \
 338       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),             \
 339             _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,                           \
 340             _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);                          \
 341       doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                           \
 342             _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);          \
 343                                                                         \
 344       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0                                  \
 345         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;                             \
 346       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1                                  \
 347         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;                             \
 348       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 349                        _FP_FRAC_WORD_4 (R, 1),                          \
 350                        (_FP_MUL_MEAT_DW_2_wide_3mul_c1                  \
 351                         & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,           \
 352                        _FP_MUL_MEAT_DW_2_wide_3mul_d,                   \
 353                        0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
 354       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 355                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f0);              \
 356       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 357                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);              \
 358       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 359                        _FP_FRAC_WORD_4 (R, 1),                          \
 360                        0, _FP_MUL_MEAT_DW_2_wide_3mul_d,                \
 361                        _FP_FRAC_WORD_4 (R, 0));                         \
 362       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 363                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
 364                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
 365                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);               \
 366       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 367                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
 368                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,                \
 369                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
 370     }                                                                   \
 371   while (0)
 372
 373 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 374   do                                                                    \
 375     {                                                                   \
 376       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);                     \
 377                                                                         \
 378       _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits,                           \
 379                                    _FP_MUL_MEAT_2_wide_3mul_z,          \
 380                                    X, Y, doit);                         \
 381                                                                         \
 382       /* Normalize since we know where the msb of the multiplicands     \
 383          were (bit B), we know that the msb of the of the product is    \
 384          at either 2B or 2B-1.  */                                      \
 385       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,                       \
 386                       wfracbits-1, 2*wfracbits);                        \
 387       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);         \
 388       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);         \
 389     }                                                                   \
 390   while (0)
 391
 392 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)       \
 393   do                                                    \
 394     {                                                   \
 395       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];            \
 396       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];            \
 397       _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;              \
 398       _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;              \
 399       _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;              \
 400       _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;              \
 401                                                         \
 402       mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,        \
 403                  _FP_MUL_MEAT_DW_2_gmp_y, 2);           \
 404     }                                                   \
 405   while (0)
 406
 407 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 408   do                                                                    \
 409     {                                                                   \
 410       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);                           \
 411                                                                         \
 412       _FP_MUL_MEAT_DW_2_gmp (wfracbits, _FP_MUL_MEAT_2_gmp_z, X, Y);    \
 413                                                                         \
 414       /* Normalize since we know where the msb of the multiplicands     \
 415          were (bit B), we know that the msb of the of the product is    \
 416          at either 2B or 2B-1.  */                                      \
 417       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, wfracbits-1, 2*wfracbits);  \
 418       R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];                               \
 419       R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];                               \
 420     }                                                                   \
 421   while (0)
 422
 423 /* Do at most 120x120=240 bits multiplication using double floating
 424    point multiplication.  This is useful if floating point
 425    multiplication has much bigger throughput than integer multiply.
 426    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 427    between 106 and 120 only.
 428    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 429    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 430    towards zero,  RESETFE should optionally reset it back.  */
 431
 432 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
 433   do                                                                    \
 434     {                                                                   \
 435       static const double _const[] =                                    \
 436         {                                                               \
 437           /* 2^-24 */ 5.9604644775390625e-08,                           \
 438           /* 2^-48 */ 3.5527136788005009e-15,                           \
 439           /* 2^-72 */ 2.1175823681357508e-22,                           \
 440           /* 2^-96 */ 1.2621774483536189e-29,                           \
 441           /* 2^28 */ 2.68435456e+08,                                    \
 442           /* 2^4 */ 1.600000e+01,                                       \
 443           /* 2^-20 */ 9.5367431640625e-07,                              \
 444           /* 2^-44 */ 5.6843418860808015e-14,                           \
 445           /* 2^-68 */ 3.3881317890172014e-21,                           \
 446           /* 2^-92 */ 2.0194839173657902e-28,                           \
 447           /* 2^-116 */ 1.2037062152420224e-35                           \
 448         };                                                              \
 449       double _a240, _b240, _c240, _d240, _e240, _f240,                  \
 450         _g240, _h240, _i240, _j240, _k240;                              \
 451       union { double d; UDItype i; } _l240, _m240, _n240, _o240,        \
 452                                        _p240, _q240, _r240, _s240;      \
 453       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;             \
 454                                                                         \
 455       if (wfracbits < 106 || wfracbits > 120)                           \
 456         abort ();                                                       \
 457                                                                         \
 458       setfetz;                                                          \
 459                                                                         \
 460       _e240 = (double) (long) (X##_f0 & 0xffffff);                      \
 461       _j240 = (double) (long) (Y##_f0 & 0xffffff);                      \
 462       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);              \
 463       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);              \
 464       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
 465       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
 466       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);               \
 467       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);               \
 468       _a240 = (double) (long) (X##_f1 >> 32);                           \
 469       _f240 = (double) (long) (Y##_f1 >> 32);                           \
 470       _e240 *= _const[3];                                               \
 471       _j240 *= _const[3];                                               \
 472       _d240 *= _const[2];                                               \
 473       _i240 *= _const[2];                                               \
 474       _c240 *= _const[1];                                               \
 475       _h240 *= _const[1];                                               \
 476       _b240 *= _const[0];                                               \
 477       _g240 *= _const[0];                                               \
 478       _s240.d =                                                       _e240*_j240; \
 479       _r240.d =                                         _d240*_j240 + _e240*_i240; \
 480       _q240.d =                           _c240*_j240 + _d240*_i240 + _e240*_h240; \
 481       _p240.d =             _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
 482       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
 483       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;  \
 484       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                \
 485       _l240.d = _a240*_g240 + _b240*_f240;                              \
 486       _k240 =   _a240*_f240;                                            \
 487       _r240.d += _s240.d;                                               \
 488       _q240.d += _r240.d;                                               \
 489       _p240.d += _q240.d;                                               \
 490       _o240.d += _p240.d;                                               \
 491       _n240.d += _o240.d;                                               \
 492       _m240.d += _n240.d;                                               \
 493       _l240.d += _m240.d;                                               \
 494       _k240 += _l240.d;                                                 \
 495       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                     \
 496       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                       \
 497       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                       \
 498       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                       \
 499       _o240.d += _const[7];                                             \
 500       _n240.d += _const[6];                                             \
 501       _m240.d += _const[5];                                             \
 502       _l240.d += _const[4];                                             \
 503       if (_s240.d != 0.0)                                               \
 504         _y240 = 1;                                                      \
 505       if (_r240.d != 0.0)                                               \
 506         _y240 = 1;                                                      \
 507       if (_q240.d != 0.0)                                               \
 508         _y240 = 1;                                                      \
 509       if (_p240.d != 0.0)                                               \
 510         _y240 = 1;                                                      \
 511       _t240 = (DItype) _k240;                                           \
 512       _u240 = _l240.i;                                                  \
 513       _v240 = _m240.i;                                                  \
 514       _w240 = _n240.i;                                                  \
 515       _x240 = _o240.i;                                                  \
 516       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                      \
 517                 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));     \
 518       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))         \
 519                 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))       \
 520                 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))       \
 521                 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))        \
 522                 | _y240);                                               \
 523       resetfe;                                                          \
 524     }                                                                   \
 525   while (0)
 526
 527 /* Division algorithms: */
 528
 529 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 530   do                                                                    \
 531     {                                                                   \
 532       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;                              \
 533       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;                              \
 534       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;                              \
 535       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;                              \
 536       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;                              \
 537       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;                              \
 538       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;                              \
 539       if (_FP_FRAC_GE_2 (X, Y))                                         \
 540         {                                                               \
 541           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;                       \
 542           _FP_DIV_MEAT_2_udiv_n_f1                                      \
 543             = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;            \
 544           _FP_DIV_MEAT_2_udiv_n_f0                                      \
 545             = X##_f0 << (_FP_W_TYPE_SIZE - 1);                          \
 546         }                                                               \
 547       else                                                              \
 548         {                                                               \
 549           R##_e--;                                                      \
 550           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;                            \
 551           _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;                            \
 552           _FP_DIV_MEAT_2_udiv_n_f0 = 0;                                 \
 553         }                                                               \
 554                                                                         \
 555       /* Normalize, i.e. make the most significant bit of the           \
 556          denominator set.  */                                           \
 557       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                          \
 558                                                                         \
 559       udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,                     \
 560                   _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,   \
 561                   Y##_f1);                                              \
 562       umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,    \
 563                  R##_f1, Y##_f0);                                       \
 564       _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;              \
 565       if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
 566         {                                                               \
 567           R##_f1--;                                                     \
 568           _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                     \
 569                           _FP_DIV_MEAT_2_udiv_r);                       \
 570           if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)                  \
 571               && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                  \
 572                                 _FP_DIV_MEAT_2_udiv_r))                 \
 573             {                                                           \
 574               R##_f1--;                                                 \
 575               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
 576                               _FP_DIV_MEAT_2_udiv_r);                   \
 577             }                                                           \
 578         }                                                               \
 579       _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);    \
 580                                                                         \
 581       if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)                           \
 582         {                                                               \
 583           /* This is a special case, not an optimization                \
 584              (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).  \
 585              As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,          \
 586              R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we  \
 587              know what kind of bits it is (sticky, guard, round),       \
 588              we don't care.  We also don't care what the reminder is,   \
 589              because the guard bit will be set anyway.  -jj */          \
 590           R##_f0 = -1;                                                  \
 591         }                                                               \
 592       else                                                              \
 593         {                                                               \
 594           udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,                 \
 595                       _FP_DIV_MEAT_2_udiv_r_f1,                         \
 596                       _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);                \
 597           umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,                          \
 598                      _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);         \
 599           _FP_DIV_MEAT_2_udiv_r_f0 = 0;                                 \
 600           if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                     \
 601                              _FP_DIV_MEAT_2_udiv_r))                    \
 602             {                                                           \
 603               R##_f0--;                                                 \
 604               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
 605                               _FP_DIV_MEAT_2_udiv_r);                   \
 606               if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)              \
 607                   && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,              \
 608                                     _FP_DIV_MEAT_2_udiv_r))             \
 609                 {                                                       \
 610                   R##_f0--;                                             \
 611                   _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,             \
 612                                   _FP_DIV_MEAT_2_udiv_r);               \
 613                 }                                                       \
 614             }                                                           \
 615           if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,                    \
 616                               _FP_DIV_MEAT_2_udiv_m))                   \
 617             R##_f0 |= _FP_WORK_STICKY;                                  \
 618         }                                                               \
 619     }                                                                   \
 620   while (0)
 621
 622
 623 /* Square root algorithms:
 624    We have just one right now, maybe Newton approximation
 625    should be added for those machines where division is fast.  */
 626
 627 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                          \
 628   do                                                            \
 629     {                                                           \
 630       while (q)                                                 \
 631         {                                                       \
 632           T##_f1 = S##_f1 + q;                                  \
 633           if (T##_f1 <= X##_f1)                                 \
 634             {                                                   \
 635               S##_f1 = T##_f1 + q;                              \
 636               X##_f1 -= T##_f1;                                 \
 637               R##_f1 += q;                                      \
 638             }                                                   \
 639           _FP_FRAC_SLL_2 (X, 1);                                \
 640           q >>= 1;                                              \
 641         }                                                       \
 642       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);              \
 643       while (q != _FP_WORK_ROUND)                               \
 644         {                                                       \
 645           T##_f0 = S##_f0 + q;                                  \
 646           T##_f1 = S##_f1;                                      \
 647           if (T##_f1 < X##_f1                                   \
 648               || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))        \
 649             {                                                   \
 650               S##_f0 = T##_f0 + q;                              \
 651               S##_f1 += (T##_f0 > S##_f0);                      \
 652               _FP_FRAC_DEC_2 (X, T);                            \
 653               R##_f0 += q;                                      \
 654             }                                                   \
 655           _FP_FRAC_SLL_2 (X, 1);                                \
 656           q >>= 1;                                              \
 657         }                                                       \
 658       if (X##_f0 | X##_f1)                                      \
 659         {                                                       \
 660           if (S##_f1 < X##_f1                                   \
 661               || (S##_f1 == X##_f1 && S##_f0 < X##_f0))         \
 662             R##_f0 |= _FP_WORK_ROUND;                           \
 663           R##_f0 |= _FP_WORK_STICKY;                            \
 664         }                                                       \
 665     }                                                           \
 666   while (0)
 667
 668
 669 /* Assembly/disassembly for converting to/from integral types.
 670    No shifting or overflow handled here.  */
 671
 672 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 673   (void) ((rsize <= _FP_W_TYPE_SIZE)            \
 674           ? ({ r = X##_f0; })                   \
 675           : ({                                  \
 676               r = X##_f1;                       \
 677               r <<= _FP_W_TYPE_SIZE;            \
 678               r += X##_f0;                      \
 679             }))
 680
 681 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
 682   do                                                                    \
 683     {                                                                   \
 684       X##_f0 = r;                                                       \
 685       X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
 686     }                                                                   \
 687   while (0)
 688
 689 /* Convert FP values between word sizes.  */
 690
 691 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
 692
 693 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
 694
 695 #define _FP_FRAC_COPY_2_2(D, S)         _FP_FRAC_COPY_2 (D, S)