soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
  34 #define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
  35 #define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
  36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  38 #define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
  39
  40 #define _FP_FRAC_SLL_2(X,N)                                                 \
  41 (void)(((N) < _FP_W_TYPE_SIZE)                                              \
  42        ? ({                                                                 \
  43             if (__builtin_constant_p(N) && (N) == 1)                        \
  44               {                                                             \
  45                 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
  46                 X##_f0 += X##_f0;                                           \
  47               }                                                             \
  48             else                                                            \
  49               {                                                             \
  50                 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  51                 X##_f0 <<= (N);                                             \
  52               }                                                             \
  53             0;                                                              \
  54           })                                                                \
  55        : ({                                                                 \
  56             X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
  57             X##_f0 = 0;                                                     \
  58           }))
  59
  60
  61 #define _FP_FRAC_SRL_2(X,N)                                             \
  62 (void)(((N) < _FP_W_TYPE_SIZE)                                          \
  63        ? ({                                                             \
  64             X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  65             X##_f1 >>= (N);                                             \
  66           })                                                            \
  67        : ({                                                             \
  68             X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                 \
  69             X##_f1 = 0;                                                 \
  70           }))
  71
  72 /* Right shift with sticky-lsb.  */
  73 #define _FP_FRAC_SRST_2(X,S, N,sz)                                        \
  74 (void)(((N) < _FP_W_TYPE_SIZE)                                            \
  75        ? ({                                                               \
  76             S = (__builtin_constant_p(N) && (N) == 1                      \
  77                  ? X##_f0 & 1                                             \
  78                  : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);             \
  79             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
  80             X##_f1 >>= (N);                                               \
  81           })                                                              \
  82        : ({                                                               \
  83             S = ((((N) == _FP_W_TYPE_SIZE                                 \
  84                    ? 0                                                    \
  85                    : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))               \
  86                   | X##_f0) != 0);                                        \
  87             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));                 \
  88             X##_f1 = 0;                                                   \
  89           }))
  90
  91 #define _FP_FRAC_SRS_2(X,N,sz)                                            \
  92 (void)(((N) < _FP_W_TYPE_SIZE)                                            \
  93        ? ({                                                               \
  94             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
  95                       (__builtin_constant_p(N) && (N) == 1                \
  96                        ? X##_f0 & 1                                       \
  97                        : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));      \
  98             X##_f1 >>= (N);                                               \
  99           })                                                              \
 100        : ({                                                               \
 101             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                 \
 102                       ((((N) == _FP_W_TYPE_SIZE                           \
 103                          ? 0                                              \
 104                          : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))         \
 105                         | X##_f0) != 0));                                 \
 106             X##_f1 = 0;                                                   \
 107           }))
 108
 109 #define _FP_FRAC_ADDI_2(X,I)    \
 110   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
 111
 112 #define _FP_FRAC_ADD_2(R,X,Y)   \
 113   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 114
 115 #define _FP_FRAC_SUB_2(R,X,Y)   \
 116   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 117
 118 #define _FP_FRAC_DEC_2(X,Y)     \
 119   __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
 120
 121 #define _FP_FRAC_CLZ_2(R,X)     \
 122   do {                          \
 123     if (X##_f1)                 \
 124       __FP_CLZ(R,X##_f1);       \
 125     else                        \
 126     {                           \
 127       __FP_CLZ(R,X##_f0);       \
 128       R += _FP_W_TYPE_SIZE;     \
 129     }                           \
 130   } while(0)
 131
 132 /* Predicates */
 133 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
 134 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 135 #define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
 136 #define _FP_FRAC_CLEAR_OVERP_2(fs,X)    (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
 137 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 138 #define _FP_FRAC_GT_2(X, Y)     \
 139   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 140 #define _FP_FRAC_GE_2(X, Y)     \
 141   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 142
 143 #define _FP_ZEROFRAC_2          0, 0
 144 #define _FP_MINFRAC_2           0, 1
 145 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
 146
 147 /*
 148  * Internals
 149  */
 150
 151 #define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
 152
 153 #define __FP_CLZ_2(R, xh, xl)   \
 154   do {                          \
 155     if (xh)                     \
 156       __FP_CLZ(R,xh);           \
 157     else                        \
 158     {                           \
 159       __FP_CLZ(R,xl);           \
 160       R += _FP_W_TYPE_SIZE;     \
 161     }                           \
 162   } while(0)
 163
 164 #if 0
 165
 166 #ifndef __FP_FRAC_ADDI_2
 167 #define __FP_FRAC_ADDI_2(xh, xl, i)     \
 168   (xh += ((xl += i) < i))
 169 #endif
 170 #ifndef __FP_FRAC_ADD_2
 171 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
 172   (rh = xh + yh + ((rl = xl + yl) < xl))
 173 #endif
 174 #ifndef __FP_FRAC_SUB_2
 175 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
 176   (rh = xh - yh - ((rl = xl - yl) > xl))
 177 #endif
 178 #ifndef __FP_FRAC_DEC_2
 179 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
 180   do {                                  \
 181     UWtype _t = xl;                     \
 182     xh -= yh + ((xl -= yl) > _t);       \
 183   } while (0)
 184 #endif
 185
 186 #else
 187
 188 #undef __FP_FRAC_ADDI_2
 189 #define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
 190 #undef __FP_FRAC_ADD_2
 191 #define __FP_FRAC_ADD_2                 add_ssaaaa
 192 #undef __FP_FRAC_SUB_2
 193 #define __FP_FRAC_SUB_2                 sub_ddmmss
 194 #undef __FP_FRAC_DEC_2
 195 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
 196
 197 #endif
 198
 199 /*
 200  * Unpack the raw bits of a native fp value.  Do not classify or
 201  * normalize the data.
 202  */
 203
 204 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
 205   do {                                                  \
 206     union _FP_UNION_##fs _flo; _flo.flt = (val);        \
 207                                                         \
 208     X##_f0 = _flo.bits.frac0;                           \
 209     X##_f1 = _flo.bits.frac1;                           \
 210     X##_e  = _flo.bits.exp;                             \
 211     X##_s  = _flo.bits.sign;                            \
 212   } while (0)
 213
 214 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
 215   do {                                                  \
 216     union _FP_UNION_##fs *_flo =                        \
 217       (union _FP_UNION_##fs *)(val);                    \
 218                                                         \
 219     X##_f0 = _flo->bits.frac0;                          \
 220     X##_f1 = _flo->bits.frac1;                          \
 221     X##_e  = _flo->bits.exp;                            \
 222     X##_s  = _flo->bits.sign;                           \
 223   } while (0)
 224
 225
 226 /*
 227  * Repack the raw bits of a native fp value.
 228  */
 229
 230 #define _FP_PACK_RAW_2(fs, val, X)                      \
 231   do {                                                  \
 232     union _FP_UNION_##fs _flo;                          \
 233                                                         \
 234     _flo.bits.frac0 = X##_f0;                           \
 235     _flo.bits.frac1 = X##_f1;                           \
 236     _flo.bits.exp   = X##_e;                            \
 237     _flo.bits.sign  = X##_s;                            \
 238                                                         \
 239     (val) = _flo.flt;                                   \
 240   } while (0)
 241
 242 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
 243   do {                                                  \
 244     union _FP_UNION_##fs *_flo =                        \
 245       (union _FP_UNION_##fs *)(val);                    \
 246                                                         \
 247     _flo->bits.frac0 = X##_f0;                          \
 248     _flo->bits.frac1 = X##_f1;                          \
 249     _flo->bits.exp   = X##_e;                           \
 250     _flo->bits.sign  = X##_s;                           \
 251   } while (0)
 252
 253
 254 /*
 255  * Multiplication algorithms:
 256  */
 257
 258 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 259
 260 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 261   do {                                                                  \
 262     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
 263                                                                         \
 264     doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
 265     doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
 266     doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
 267     doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
 268                                                                         \
 269     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 270                     _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,             \
 271                     _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 272                     _FP_FRAC_WORD_4(_z,1));                             \
 273     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 274                     _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,             \
 275                     _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 276                     _FP_FRAC_WORD_4(_z,1));                             \
 277                                                                         \
 278     /* Normalize since we know where the msb of the multiplicands       \
 279        were (bit B), we know that the msb of the of the product is      \
 280        at either 2B or 2B-1.  */                                        \
 281     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 282     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
 283     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
 284   } while (0)
 285
 286 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 287    Do only 3 multiplications instead of four. This one is for machines
 288    where multiplication is much more expensive than subtraction.  */
 289
 290 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 291   do {                                                                  \
 292     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
 293     _FP_W_TYPE _d;                                                      \
 294     int _c1, _c2;                                                       \
 295                                                                         \
 296     _b_f0 = X##_f0 + X##_f1;                                            \
 297     _c1 = _b_f0 < X##_f0;                                               \
 298     _b_f1 = Y##_f0 + Y##_f1;                                            \
 299     _c2 = _b_f1 < Y##_f0;                                               \
 300     doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);                    \
 301     doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);   \
 302     doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                 \
 303                                                                         \
 304     _b_f0 &= -_c2;                                                      \
 305     _b_f1 &= -_c1;                                                      \
 306     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 307                     _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,          \
 308                     0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));   \
 309     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
 310                      _b_f0);                                            \
 311     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
 312                      _b_f1);                                            \
 313     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 314                     _FP_FRAC_WORD_4(_z,1),                              \
 315                     0, _d, _FP_FRAC_WORD_4(_z,0));                      \
 316     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 317                     _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);            \
 318     __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),       \
 319                     _c_f1, _c_f0,                                       \
 320                     _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));      \
 321                                                                         \
 322     /* Normalize since we know where the msb of the multiplicands       \
 323        were (bit B), we know that the msb of the of the product is      \
 324        at either 2B or 2B-1.  */                                        \
 325     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 326     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
 327     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
 328   } while (0)
 329
 330 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 331   do {                                                                  \
 332     _FP_FRAC_DECL_4(_z);                                                \
 333     _FP_W_TYPE _x[2], _y[2];                                            \
 334     _x[0] = X##_f0; _x[1] = X##_f1;                                     \
 335     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
 336                                                                         \
 337     mpn_mul_n(_z_f, _x, _y, 2);                                         \
 338                                                                         \
 339     /* Normalize since we know where the msb of the multiplicands       \
 340        were (bit B), we know that the msb of the of the product is      \
 341        at either 2B or 2B-1.  */                                        \
 342     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 343     R##_f0 = _z_f[0];                                                   \
 344     R##_f1 = _z_f[1];                                                   \
 345   } while (0)
 346
 347 /* Do at most 120x120=240 bits multiplication using double floating
 348    point multiplication.  This is useful if floating point
 349    multiplication has much bigger throughput than integer multiply.
 350    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 351    between 106 and 120 only.
 352    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 353    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 354    towards zero,  RESETFE should optionally reset it back.  */
 355
 356 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)     \
 357   do {                                                                          \
 358     static const double _const[] = {                                            \
 359       /* 2^-24 */ 5.9604644775390625e-08,                                       \
 360       /* 2^-48 */ 3.5527136788005009e-15,                                       \
 361       /* 2^-72 */ 2.1175823681357508e-22,                                       \
 362       /* 2^-96 */ 1.2621774483536189e-29,                                       \
 363       /* 2^28 */ 2.68435456e+08,                                                \
 364       /* 2^4 */ 1.600000e+01,                                                   \
 365       /* 2^-20 */ 9.5367431640625e-07,                                          \
 366       /* 2^-44 */ 5.6843418860808015e-14,                                       \
 367       /* 2^-68 */ 3.3881317890172014e-21,                                       \
 368       /* 2^-92 */ 2.0194839173657902e-28,                                       \
 369       /* 2^-116 */ 1.2037062152420224e-35};                                     \
 370     double _a240, _b240, _c240, _d240, _e240, _f240,                            \
 371            _g240, _h240, _i240, _j240, _k240;                                   \
 372     union { double d; UDItype i; } _l240, _m240, _n240, _o240,                  \
 373                                    _p240, _q240, _r240, _s240;                  \
 374     UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;                       \
 375                                                                                 \
 376     if (wfracbits < 106 || wfracbits > 120)                                     \
 377       abort();                                                                  \
 378                                                                                 \
 379     setfetz;                                                                    \
 380                                                                                 \
 381     _e240 = (double)(long)(X##_f0 & 0xffffff);                                  \
 382     _j240 = (double)(long)(Y##_f0 & 0xffffff);                                  \
 383     _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);                          \
 384     _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);                          \
 385     _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));       \
 386     _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));       \
 387     _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);                           \
 388     _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);                           \
 389     _a240 = (double)(long)(X##_f1 >> 32);                                       \
 390     _f240 = (double)(long)(Y##_f1 >> 32);                                       \
 391     _e240 *= _const[3];                                                         \
 392     _j240 *= _const[3];                                                         \
 393     _d240 *= _const[2];                                                         \
 394     _i240 *= _const[2];                                                         \
 395     _c240 *= _const[1];                                                         \
 396     _h240 *= _const[1];                                                         \
 397     _b240 *= _const[0];                                                         \
 398     _g240 *= _const[0];                                                         \
 399     _s240.d =                                                         _e240*_j240;\
 400     _r240.d =                                           _d240*_j240 + _e240*_i240;\
 401     _q240.d =                             _c240*_j240 + _d240*_i240 + _e240*_h240;\
 402     _p240.d =               _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
 403     _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
 404     _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;            \
 405     _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                          \
 406     _l240.d = _a240*_g240 + _b240*_f240;                                        \
 407     _k240 =   _a240*_f240;                                                      \
 408     _r240.d += _s240.d;                                                         \
 409     _q240.d += _r240.d;                                                         \
 410     _p240.d += _q240.d;                                                         \
 411     _o240.d += _p240.d;                                                         \
 412     _n240.d += _o240.d;                                                         \
 413     _m240.d += _n240.d;                                                         \
 414     _l240.d += _m240.d;                                                         \
 415     _k240 += _l240.d;                                                           \
 416     _s240.d -= ((_const[10]+_s240.d)-_const[10]);                               \
 417     _r240.d -= ((_const[9]+_r240.d)-_const[9]);                                 \
 418     _q240.d -= ((_const[8]+_q240.d)-_const[8]);                                 \
 419     _p240.d -= ((_const[7]+_p240.d)-_const[7]);                                 \
 420     _o240.d += _const[7];                                                       \
 421     _n240.d += _const[6];                                                       \
 422     _m240.d += _const[5];                                                       \
 423     _l240.d += _const[4];                                                       \
 424     if (_s240.d != 0.0) _y240 = 1;                                              \
 425     if (_r240.d != 0.0) _y240 = 1;                                              \
 426     if (_q240.d != 0.0) _y240 = 1;                                              \
 427     if (_p240.d != 0.0) _y240 = 1;                                              \
 428     _t240 = (DItype)_k240;                                                      \
 429     _u240 = _l240.i;                                                            \
 430     _v240 = _m240.i;                                                            \
 431     _w240 = _n240.i;                                                            \
 432     _x240 = _o240.i;                                                            \
 433     R##_f1 = (_t240 << (128 - (wfracbits - 1)))                                 \
 434              | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));                 \
 435     R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                    \
 436              | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))                  \
 437              | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))                  \
 438              | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))                   \
 439              | _y240;                                                           \
 440     resetfe;                                                                    \
 441   } while (0)
 442
 443 /*
 444  * Division algorithms:
 445  */
 446
 447 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 448   do {                                                                  \
 449     _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;         \
 450     if (_FP_FRAC_GT_2(X, Y))                                            \
 451       {                                                                 \
 452         _n_f2 = X##_f1 >> 1;                                            \
 453         _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
 454         _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
 455       }                                                                 \
 456     else                                                                \
 457       {                                                                 \
 458         R##_e--;                                                        \
 459         _n_f2 = X##_f1;                                                 \
 460         _n_f1 = X##_f0;                                                 \
 461         _n_f0 = 0;                                                      \
 462       }                                                                 \
 463                                                                         \
 464     /* Normalize, i.e. make the most significant bit of the             \
 465        denominator set. */                                              \
 466     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);                             \
 467                                                                         \
 468     udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                    \
 469     umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                            \
 470     _r_f0 = _n_f0;                                                      \
 471     if (_FP_FRAC_GT_2(_m, _r))                                          \
 472       {                                                                 \
 473         R##_f1--;                                                       \
 474         _FP_FRAC_ADD_2(_r, Y, _r);                                      \
 475         if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
 476           {                                                             \
 477             R##_f1--;                                                   \
 478             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
 479           }                                                             \
 480       }                                                                 \
 481     _FP_FRAC_DEC_2(_r, _m);                                             \
 482                                                                         \
 483     if (_r_f1 == Y##_f1)                                                \
 484       {                                                                 \
 485         /* This is a special case, not an optimization                  \
 486            (_r/Y##_f1 would not fit into UWtype).                       \
 487            As _r is guaranteed to be < Y,  R##_f0 can be either         \
 488            (UWtype)-1 or (UWtype)-2.  But as we know what kind          \
 489            of bits it is (sticky, guard, round),  we don't care.        \
 490            We also don't care what the reminder is,  because the        \
 491            guard bit will be set anyway.  -jj */                        \
 492         R##_f0 = -1;                                                    \
 493       }                                                                 \
 494     else                                                                \
 495       {                                                                 \
 496         udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);                \
 497         umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);                        \
 498         _r_f0 = 0;                                                      \
 499         if (_FP_FRAC_GT_2(_m, _r))                                      \
 500           {                                                             \
 501             R##_f0--;                                                   \
 502             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
 503             if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))          \
 504               {                                                         \
 505                 R##_f0--;                                               \
 506                 _FP_FRAC_ADD_2(_r, Y, _r);                              \
 507               }                                                         \
 508           }                                                             \
 509         if (!_FP_FRAC_EQ_2(_r, _m))                                     \
 510           R##_f0 |= _FP_WORK_STICKY;                                    \
 511       }                                                                 \
 512   } while (0)
 513
 514
 515 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
 516   do {                                                                  \
 517     _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
 518     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
 519     _x[0] = _x[3] = 0;                                                  \
 520     if (_FP_FRAC_GT_2(X, Y))                                            \
 521       {                                                                 \
 522         R##_e++;                                                        \
 523         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
 524                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
 525                             (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
 526         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);     \
 527       }                                                                 \
 528     else                                                                \
 529       {                                                                 \
 530         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |     \
 531                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
 532                             (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
 533         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);       \
 534       }                                                                 \
 535                                                                         \
 536     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                            \
 537     R##_f1 = _z[1];                                                     \
 538     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
 539   } while (0)
 540
 541
 542 /*
 543  * Square root algorithms:
 544  * We have just one right now, maybe Newton approximation
 545  * should be added for those machines where division is fast.
 546  */
 547
 548 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
 549   do {                                                  \
 550     while (q)                                           \
 551       {                                                 \
 552         T##_f1 = S##_f1 + q;                            \
 553         if (T##_f1 <= X##_f1)                           \
 554           {                                             \
 555             S##_f1 = T##_f1 + q;                        \
 556             X##_f1 -= T##_f1;                           \
 557             R##_f1 += q;                                \
 558           }                                             \
 559         _FP_FRAC_SLL_2(X, 1);                           \
 560         q >>= 1;                                        \
 561       }                                                 \
 562     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
 563     while (q != _FP_WORK_ROUND)                         \
 564       {                                                 \
 565         T##_f0 = S##_f0 + q;                            \
 566         T##_f1 = S##_f1;                                \
 567         if (T##_f1 < X##_f1 ||                          \
 568             (T##_f1 == X##_f1 && T##_f0 <= X##_f0))     \
 569           {                                             \
 570             S##_f0 = T##_f0 + q;                        \
 571             S##_f1 += (T##_f0 > S##_f0);                \
 572             _FP_FRAC_DEC_2(X, T);                       \
 573             R##_f0 += q;                                \
 574           }                                             \
 575         _FP_FRAC_SLL_2(X, 1);                           \
 576         q >>= 1;                                        \
 577       }                                                 \
 578     if (X##_f0 | X##_f1)                                \
 579       {                                                 \
 580         if (S##_f1 < X##_f1 ||                          \
 581             (S##_f1 == X##_f1 && S##_f0 < X##_f0))      \
 582           R##_f0 |= _FP_WORK_ROUND;                     \
 583         R##_f0 |= _FP_WORK_STICKY;                      \
 584       }                                                 \
 585   } while (0)
 586
 587
 588 /*
 589  * Assembly/disassembly for converting to/from integral types.
 590  * No shifting or overflow handled here.
 591  */
 592
 593 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 594 (void)((rsize <= _FP_W_TYPE_SIZE)               \
 595        ? ({ r = X##_f0; })                      \
 596        : ({                                     \
 597             r = X##_f1;                         \
 598             r <<= _FP_W_TYPE_SIZE;              \
 599             r += X##_f0;                        \
 600           }))
 601
 602 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
 603   do {                                                                  \
 604     X##_f0 = r;                                                         \
 605     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
 606   } while (0)
 607
 608 /*
 609  * Convert FP values between word sizes
 610  */
 611
 612 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
 613
 614 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
 615
 616 #define _FP_FRAC_COPY_2_2(D,S)          _FP_FRAC_COPY_2(D,S)