soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    The GNU C Library is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    Lesser General Public License for more details.
  19
  20    You should have received a copy of the GNU Lesser General Public
  21    License along with the GNU C Library; if not, write to the Free
  22    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  23    02111-1307 USA.  */
  24
  25 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
  26 #define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
  27 #define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
  28 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  29 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  30 #define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
  31
  32 #define _FP_FRAC_SLL_2(X,N)                                                 \
  33 (void)(((N) < _FP_W_TYPE_SIZE)                                              \
  34        ? ({                                                                 \
  35             if (__builtin_constant_p(N) && (N) == 1)                        \
  36               {                                                             \
  37                 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
  38                 X##_f0 += X##_f0;                                           \
  39               }                                                             \
  40             else                                                            \
  41               {                                                             \
  42                 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  43                 X##_f0 <<= (N);                                             \
  44               }                                                             \
  45             0;                                                              \
  46           })                                                                \
  47        : ({                                                                 \
  48             X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
  49             X##_f0 = 0;                                                     \
  50           }))
  51
  52
  53 #define _FP_FRAC_SRL_2(X,N)                                             \
  54 (void)(((N) < _FP_W_TYPE_SIZE)                                          \
  55        ? ({                                                             \
  56             X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  57             X##_f1 >>= (N);                                             \
  58           })                                                            \
  59        : ({                                                             \
  60             X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                 \
  61             X##_f1 = 0;                                                 \
  62           }))
  63
  64 /* Right shift with sticky-lsb.  */
  65 #define _FP_FRAC_SRST_2(X,S, N,sz)                                        \
  66 (void)(((N) < _FP_W_TYPE_SIZE)                                            \
  67        ? ({                                                               \
  68             S = (__builtin_constant_p(N) && (N) == 1                      \
  69                  ? X##_f0 & 1                                             \
  70                  : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);             \
  71             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
  72             X##_f1 >>= (N);                                               \
  73           })                                                              \
  74        : ({                                                               \
  75             S = ((((N) == _FP_W_TYPE_SIZE                                 \
  76                    ? 0                                                    \
  77                    : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))               \
  78                   | X##_f0) != 0);                                        \
  79             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));                 \
  80             X##_f1 = 0;                                                   \
  81           }))
  82
  83 #define _FP_FRAC_SRS_2(X,N,sz)                                            \
  84 (void)(((N) < _FP_W_TYPE_SIZE)                                            \
  85        ? ({                                                               \
  86             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
  87                       (__builtin_constant_p(N) && (N) == 1                \
  88                        ? X##_f0 & 1                                       \
  89                        : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));      \
  90             X##_f1 >>= (N);                                               \
  91           })                                                              \
  92        : ({                                                               \
  93             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                 \
  94                       ((((N) == _FP_W_TYPE_SIZE                           \
  95                          ? 0                                              \
  96                          : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))         \
  97                         | X##_f0) != 0));                                 \
  98             X##_f1 = 0;                                                   \
  99           }))
 100
 101 #define _FP_FRAC_ADDI_2(X,I)    \
 102   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
 103
 104 #define _FP_FRAC_ADD_2(R,X,Y)   \
 105   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 106
 107 #define _FP_FRAC_SUB_2(R,X,Y)   \
 108   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 109
 110 #define _FP_FRAC_DEC_2(X,Y)     \
 111   __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
 112
 113 #define _FP_FRAC_CLZ_2(R,X)     \
 114   do {                          \
 115     if (X##_f1)                 \
 116       __FP_CLZ(R,X##_f1);       \
 117     else                        \
 118     {                           \
 119       __FP_CLZ(R,X##_f0);       \
 120       R += _FP_W_TYPE_SIZE;     \
 121     }                           \
 122   } while(0)
 123
 124 /* Predicates */
 125 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
 126 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 127 #define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
 128 #define _FP_FRAC_CLEAR_OVERP_2(fs,X)    (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
 129 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 130 #define _FP_FRAC_GT_2(X, Y)     \
 131   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 132 #define _FP_FRAC_GE_2(X, Y)     \
 133   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 134
 135 #define _FP_ZEROFRAC_2          0, 0
 136 #define _FP_MINFRAC_2           0, 1
 137 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
 138
 139 /*
 140  * Internals
 141  */
 142
 143 #define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
 144
 145 #define __FP_CLZ_2(R, xh, xl)   \
 146   do {                          \
 147     if (xh)                     \
 148       __FP_CLZ(R,xh);           \
 149     else                        \
 150     {                           \
 151       __FP_CLZ(R,xl);           \
 152       R += _FP_W_TYPE_SIZE;     \
 153     }                           \
 154   } while(0)
 155
 156 #if 0
 157
 158 #ifndef __FP_FRAC_ADDI_2
 159 #define __FP_FRAC_ADDI_2(xh, xl, i)     \
 160   (xh += ((xl += i) < i))
 161 #endif
 162 #ifndef __FP_FRAC_ADD_2
 163 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
 164   (rh = xh + yh + ((rl = xl + yl) < xl))
 165 #endif
 166 #ifndef __FP_FRAC_SUB_2
 167 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
 168   (rh = xh - yh - ((rl = xl - yl) > xl))
 169 #endif
 170 #ifndef __FP_FRAC_DEC_2
 171 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
 172   do {                                  \
 173     UWtype _t = xl;                     \
 174     xh -= yh + ((xl -= yl) > _t);       \
 175   } while (0)
 176 #endif
 177
 178 #else
 179
 180 #undef __FP_FRAC_ADDI_2
 181 #define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
 182 #undef __FP_FRAC_ADD_2
 183 #define __FP_FRAC_ADD_2                 add_ssaaaa
 184 #undef __FP_FRAC_SUB_2
 185 #define __FP_FRAC_SUB_2                 sub_ddmmss
 186 #undef __FP_FRAC_DEC_2
 187 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
 188
 189 #endif
 190
 191 /*
 192  * Unpack the raw bits of a native fp value.  Do not classify or
 193  * normalize the data.
 194  */
 195
 196 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
 197   do {                                                  \
 198     union _FP_UNION_##fs _flo; _flo.flt = (val);        \
 199                                                         \
 200     X##_f0 = _flo.bits.frac0;                           \
 201     X##_f1 = _flo.bits.frac1;                           \
 202     X##_e  = _flo.bits.exp;                             \
 203     X##_s  = _flo.bits.sign;                            \
 204   } while (0)
 205
 206 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
 207   do {                                                  \
 208     union _FP_UNION_##fs *_flo =                        \
 209       (union _FP_UNION_##fs *)(val);                    \
 210                                                         \
 211     X##_f0 = _flo->bits.frac0;                          \
 212     X##_f1 = _flo->bits.frac1;                          \
 213     X##_e  = _flo->bits.exp;                            \
 214     X##_s  = _flo->bits.sign;                           \
 215   } while (0)
 216
 217
 218 /*
 219  * Repack the raw bits of a native fp value.
 220  */
 221
 222 #define _FP_PACK_RAW_2(fs, val, X)                      \
 223   do {                                                  \
 224     union _FP_UNION_##fs _flo;                          \
 225                                                         \
 226     _flo.bits.frac0 = X##_f0;                           \
 227     _flo.bits.frac1 = X##_f1;                           \
 228     _flo.bits.exp   = X##_e;                            \
 229     _flo.bits.sign  = X##_s;                            \
 230                                                         \
 231     (val) = _flo.flt;                                   \
 232   } while (0)
 233
 234 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
 235   do {                                                  \
 236     union _FP_UNION_##fs *_flo =                        \
 237       (union _FP_UNION_##fs *)(val);                    \
 238                                                         \
 239     _flo->bits.frac0 = X##_f0;                          \
 240     _flo->bits.frac1 = X##_f1;                          \
 241     _flo->bits.exp   = X##_e;                           \
 242     _flo->bits.sign  = X##_s;                           \
 243   } while (0)
 244
 245
 246 /*
 247  * Multiplication algorithms:
 248  */
 249
 250 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 251
 252 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 253   do {                                                                  \
 254     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
 255                                                                         \
 256     doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
 257     doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
 258     doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
 259     doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
 260                                                                         \
 261     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 262                     _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,             \
 263                     _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 264                     _FP_FRAC_WORD_4(_z,1));                             \
 265     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 266                     _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,             \
 267                     _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 268                     _FP_FRAC_WORD_4(_z,1));                             \
 269                                                                         \
 270     /* Normalize since we know where the msb of the multiplicands       \
 271        were (bit B), we know that the msb of the of the product is      \
 272        at either 2B or 2B-1.  */                                        \
 273     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 274     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
 275     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
 276   } while (0)
 277
 278 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 279    Do only 3 multiplications instead of four. This one is for machines
 280    where multiplication is much more expensive than subtraction.  */
 281
 282 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 283   do {                                                                  \
 284     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
 285     _FP_W_TYPE _d;                                                      \
 286     int _c1, _c2;                                                       \
 287                                                                         \
 288     _b_f0 = X##_f0 + X##_f1;                                            \
 289     _c1 = _b_f0 < X##_f0;                                               \
 290     _b_f1 = Y##_f0 + Y##_f1;                                            \
 291     _c2 = _b_f1 < Y##_f0;                                               \
 292     doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);                    \
 293     doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);   \
 294     doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                 \
 295                                                                         \
 296     _b_f0 &= -_c2;                                                      \
 297     _b_f1 &= -_c1;                                                      \
 298     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 299                     _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,          \
 300                     0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));   \
 301     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
 302                      _b_f0);                                            \
 303     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
 304                      _b_f1);                                            \
 305     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 306                     _FP_FRAC_WORD_4(_z,1),                              \
 307                     0, _d, _FP_FRAC_WORD_4(_z,0));                      \
 308     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 309                     _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);            \
 310     __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),       \
 311                     _c_f1, _c_f0,                                       \
 312                     _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));      \
 313                                                                         \
 314     /* Normalize since we know where the msb of the multiplicands       \
 315        were (bit B), we know that the msb of the of the product is      \
 316        at either 2B or 2B-1.  */                                        \
 317     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 318     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
 319     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
 320   } while (0)
 321
 322 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 323   do {                                                                  \
 324     _FP_FRAC_DECL_4(_z);                                                \
 325     _FP_W_TYPE _x[2], _y[2];                                            \
 326     _x[0] = X##_f0; _x[1] = X##_f1;                                     \
 327     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
 328                                                                         \
 329     mpn_mul_n(_z_f, _x, _y, 2);                                         \
 330                                                                         \
 331     /* Normalize since we know where the msb of the multiplicands       \
 332        were (bit B), we know that the msb of the of the product is      \
 333        at either 2B or 2B-1.  */                                        \
 334     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 335     R##_f0 = _z_f[0];                                                   \
 336     R##_f1 = _z_f[1];                                                   \
 337   } while (0)
 338
 339 /* Do at most 120x120=240 bits multiplication using double floating
 340    point multiplication.  This is useful if floating point
 341    multiplication has much bigger throughput than integer multiply.
 342    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 343    between 106 and 120 only.
 344    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 345    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 346    towards zero,  RESETFE should optionally reset it back.  */
 347
 348 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)     \
 349   do {                                                                          \
 350     static const double _const[] = {                                            \
 351       /* 2^-24 */ 5.9604644775390625e-08,                                       \
 352       /* 2^-48 */ 3.5527136788005009e-15,                                       \
 353       /* 2^-72 */ 2.1175823681357508e-22,                                       \
 354       /* 2^-96 */ 1.2621774483536189e-29,                                       \
 355       /* 2^28 */ 2.68435456e+08,                                                \
 356       /* 2^4 */ 1.600000e+01,                                                   \
 357       /* 2^-20 */ 9.5367431640625e-07,                                          \
 358       /* 2^-44 */ 5.6843418860808015e-14,                                       \
 359       /* 2^-68 */ 3.3881317890172014e-21,                                       \
 360       /* 2^-92 */ 2.0194839173657902e-28,                                       \
 361       /* 2^-116 */ 1.2037062152420224e-35};                                     \
 362     double _a240, _b240, _c240, _d240, _e240, _f240,                            \
 363            _g240, _h240, _i240, _j240, _k240;                                   \
 364     union { double d; UDItype i; } _l240, _m240, _n240, _o240,                  \
 365                                    _p240, _q240, _r240, _s240;                  \
 366     UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;                       \
 367                                                                                 \
 368     if (wfracbits < 106 || wfracbits > 120)                                     \
 369       abort();                                                                  \
 370                                                                                 \
 371     setfetz;                                                                    \
 372                                                                                 \
 373     _e240 = (double)(long)(X##_f0 & 0xffffff);                                  \
 374     _j240 = (double)(long)(Y##_f0 & 0xffffff);                                  \
 375     _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);                          \
 376     _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);                          \
 377     _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));       \
 378     _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));       \
 379     _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);                           \
 380     _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);                           \
 381     _a240 = (double)(long)(X##_f1 >> 32);                                       \
 382     _f240 = (double)(long)(Y##_f1 >> 32);                                       \
 383     _e240 *= _const[3];                                                         \
 384     _j240 *= _const[3];                                                         \
 385     _d240 *= _const[2];                                                         \
 386     _i240 *= _const[2];                                                         \
 387     _c240 *= _const[1];                                                         \
 388     _h240 *= _const[1];                                                         \
 389     _b240 *= _const[0];                                                         \
 390     _g240 *= _const[0];                                                         \
 391     _s240.d =                                                         _e240*_j240;\
 392     _r240.d =                                           _d240*_j240 + _e240*_i240;\
 393     _q240.d =                             _c240*_j240 + _d240*_i240 + _e240*_h240;\
 394     _p240.d =               _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
 395     _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
 396     _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;            \
 397     _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                          \
 398     _l240.d = _a240*_g240 + _b240*_f240;                                        \
 399     _k240 =   _a240*_f240;                                                      \
 400     _r240.d += _s240.d;                                                         \
 401     _q240.d += _r240.d;                                                         \
 402     _p240.d += _q240.d;                                                         \
 403     _o240.d += _p240.d;                                                         \
 404     _n240.d += _o240.d;                                                         \
 405     _m240.d += _n240.d;                                                         \
 406     _l240.d += _m240.d;                                                         \
 407     _k240 += _l240.d;                                                           \
 408     _s240.d -= ((_const[10]+_s240.d)-_const[10]);                               \
 409     _r240.d -= ((_const[9]+_r240.d)-_const[9]);                                 \
 410     _q240.d -= ((_const[8]+_q240.d)-_const[8]);                                 \
 411     _p240.d -= ((_const[7]+_p240.d)-_const[7]);                                 \
 412     _o240.d += _const[7];                                                       \
 413     _n240.d += _const[6];                                                       \
 414     _m240.d += _const[5];                                                       \
 415     _l240.d += _const[4];                                                       \
 416     if (_s240.d != 0.0) _y240 = 1;                                              \
 417     if (_r240.d != 0.0) _y240 = 1;                                              \
 418     if (_q240.d != 0.0) _y240 = 1;                                              \
 419     if (_p240.d != 0.0) _y240 = 1;                                              \
 420     _t240 = (DItype)_k240;                                                      \
 421     _u240 = _l240.i;                                                            \
 422     _v240 = _m240.i;                                                            \
 423     _w240 = _n240.i;                                                            \
 424     _x240 = _o240.i;                                                            \
 425     R##_f1 = (_t240 << (128 - (wfracbits - 1)))                                 \
 426              | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));                 \
 427     R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                    \
 428              | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))                  \
 429              | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))                  \
 430              | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))                   \
 431              | _y240;                                                           \
 432     resetfe;                                                                    \
 433   } while (0)
 434
 435 /*
 436  * Division algorithms:
 437  */
 438
 439 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 440   do {                                                                  \
 441     _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;         \
 442     if (_FP_FRAC_GT_2(X, Y))                                            \
 443       {                                                                 \
 444         _n_f2 = X##_f1 >> 1;                                            \
 445         _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
 446         _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
 447       }                                                                 \
 448     else                                                                \
 449       {                                                                 \
 450         R##_e--;                                                        \
 451         _n_f2 = X##_f1;                                                 \
 452         _n_f1 = X##_f0;                                                 \
 453         _n_f0 = 0;                                                      \
 454       }                                                                 \
 455                                                                         \
 456     /* Normalize, i.e. make the most significant bit of the             \
 457        denominator set. */                                              \
 458     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);                             \
 459                                                                         \
 460     udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                    \
 461     umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                            \
 462     _r_f0 = _n_f0;                                                      \
 463     if (_FP_FRAC_GT_2(_m, _r))                                          \
 464       {                                                                 \
 465         R##_f1--;                                                       \
 466         _FP_FRAC_ADD_2(_r, Y, _r);                                      \
 467         if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
 468           {                                                             \
 469             R##_f1--;                                                   \
 470             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
 471           }                                                             \
 472       }                                                                 \
 473     _FP_FRAC_DEC_2(_r, _m);                                             \
 474                                                                         \
 475     if (_r_f1 == Y##_f1)                                                \
 476       {                                                                 \
 477         /* This is a special case, not an optimization                  \
 478            (_r/Y##_f1 would not fit into UWtype).                       \
 479            As _r is guaranteed to be < Y,  R##_f0 can be either         \
 480            (UWtype)-1 or (UWtype)-2.  But as we know what kind          \
 481            of bits it is (sticky, guard, round),  we don't care.        \
 482            We also don't care what the reminder is,  because the        \
 483            guard bit will be set anyway.  -jj */                        \
 484         R##_f0 = -1;                                                    \
 485       }                                                                 \
 486     else                                                                \
 487       {                                                                 \
 488         udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);                \
 489         umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);                        \
 490         _r_f0 = 0;                                                      \
 491         if (_FP_FRAC_GT_2(_m, _r))                                      \
 492           {                                                             \
 493             R##_f0--;                                                   \
 494             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
 495             if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))          \
 496               {                                                         \
 497                 R##_f0--;                                               \
 498                 _FP_FRAC_ADD_2(_r, Y, _r);                              \
 499               }                                                         \
 500           }                                                             \
 501         if (!_FP_FRAC_EQ_2(_r, _m))                                     \
 502           R##_f0 |= _FP_WORK_STICKY;                                    \
 503       }                                                                 \
 504   } while (0)
 505
 506
 507 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
 508   do {                                                                  \
 509     _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
 510     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
 511     _x[0] = _x[3] = 0;                                                  \
 512     if (_FP_FRAC_GT_2(X, Y))                                            \
 513       {                                                                 \
 514         R##_e++;                                                        \
 515         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
 516                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
 517                             (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
 518         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);     \
 519       }                                                                 \
 520     else                                                                \
 521       {                                                                 \
 522         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |     \
 523                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
 524                             (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
 525         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);       \
 526       }                                                                 \
 527                                                                         \
 528     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                            \
 529     R##_f1 = _z[1];                                                     \
 530     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
 531   } while (0)
 532
 533
 534 /*
 535  * Square root algorithms:
 536  * We have just one right now, maybe Newton approximation
 537  * should be added for those machines where division is fast.
 538  */
 539
 540 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
 541   do {                                                  \
 542     while (q)                                           \
 543       {                                                 \
 544         T##_f1 = S##_f1 + q;                            \
 545         if (T##_f1 <= X##_f1)                           \
 546           {                                             \
 547             S##_f1 = T##_f1 + q;                        \
 548             X##_f1 -= T##_f1;                           \
 549             R##_f1 += q;                                \
 550           }                                             \
 551         _FP_FRAC_SLL_2(X, 1);                           \
 552         q >>= 1;                                        \
 553       }                                                 \
 554     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
 555     while (q != _FP_WORK_ROUND)                         \
 556       {                                                 \
 557         T##_f0 = S##_f0 + q;                            \
 558         T##_f1 = S##_f1;                                \
 559         if (T##_f1 < X##_f1 ||                          \
 560             (T##_f1 == X##_f1 && T##_f0 <= X##_f0))     \
 561           {                                             \
 562             S##_f0 = T##_f0 + q;                        \
 563             S##_f1 += (T##_f0 > S##_f0);                \
 564             _FP_FRAC_DEC_2(X, T);                       \
 565             R##_f0 += q;                                \
 566           }                                             \
 567         _FP_FRAC_SLL_2(X, 1);                           \
 568         q >>= 1;                                        \
 569       }                                                 \
 570     if (X##_f0 | X##_f1)                                \
 571       {                                                 \
 572         if (S##_f1 < X##_f1 ||                          \
 573             (S##_f1 == X##_f1 && S##_f0 < X##_f0))      \
 574           R##_f0 |= _FP_WORK_ROUND;                     \
 575         R##_f0 |= _FP_WORK_STICKY;                      \
 576       }                                                 \
 577   } while (0)
 578
 579
 580 /*
 581  * Assembly/disassembly for converting to/from integral types.
 582  * No shifting or overflow handled here.
 583  */
 584
 585 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 586 (void)((rsize <= _FP_W_TYPE_SIZE)               \
 587        ? ({ r = X##_f0; })                      \
 588        : ({                                     \
 589             r = X##_f1;                         \
 590             r <<= _FP_W_TYPE_SIZE;              \
 591             r += X##_f0;                        \
 592           }))
 593
 594 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
 595   do {                                                                  \
 596     X##_f0 = r;                                                         \
 597     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
 598   } while (0)
 599
 600 /*
 601  * Convert FP values between word sizes
 602  */
 603
 604 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
 605
 606 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))