soft-fp/op-4.h

   1 /* Software floating-point emulation.
   2    Basic four-word fraction declaration and manipulation.
   3    Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, write to the Free
  31    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  32    MA 02110-1301, USA.  */
  33
  34 #define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
  35 #define _FP_FRAC_COPY_4(D,S)                    \
  36   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
  37    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
  38 #define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
  39 #define _FP_FRAC_HIGH_4(X)      (X##_f[3])
  40 #define _FP_FRAC_LOW_4(X)       (X##_f[0])
  41 #define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
  42
  43 #define _FP_FRAC_SLL_4(X,N)                                             \
  44   do {                                                                  \
  45     _FP_I_TYPE _up, _down, _skip, _i;                                   \
  46     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
  47     _up = (N) % _FP_W_TYPE_SIZE;                                        \
  48     _down = _FP_W_TYPE_SIZE - _up;                                      \
  49     if (!_up)                                                           \
  50       for (_i = 3; _i >= _skip; --_i)                                   \
  51         X##_f[_i] = X##_f[_i-_skip];                                    \
  52     else                                                                \
  53       {                                                                 \
  54         for (_i = 3; _i > _skip; --_i)                                  \
  55           X##_f[_i] = X##_f[_i-_skip] << _up                            \
  56                       | X##_f[_i-_skip-1] >> _down;                     \
  57         X##_f[_i--] = X##_f[0] << _up;                                  \
  58       }                                                                 \
  59     for (; _i >= 0; --_i)                                               \
  60       X##_f[_i] = 0;                                                    \
  61   } while (0)
  62
  63 /* This one was broken too */
  64 #define _FP_FRAC_SRL_4(X,N)                                             \
  65   do {                                                                  \
  66     _FP_I_TYPE _up, _down, _skip, _i;                                   \
  67     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
  68     _down = (N) % _FP_W_TYPE_SIZE;                                      \
  69     _up = _FP_W_TYPE_SIZE - _down;                                      \
  70     if (!_down)                                                         \
  71       for (_i = 0; _i <= 3-_skip; ++_i)                                 \
  72         X##_f[_i] = X##_f[_i+_skip];                                    \
  73     else                                                                \
  74       {                                                                 \
  75         for (_i = 0; _i < 3-_skip; ++_i)                                \
  76           X##_f[_i] = X##_f[_i+_skip] >> _down                          \
  77                       | X##_f[_i+_skip+1] << _up;                       \
  78         X##_f[_i++] = X##_f[3] >> _down;                                \
  79       }                                                                 \
  80     for (; _i < 4; ++_i)                                                \
  81       X##_f[_i] = 0;                                                    \
  82   } while (0)
  83
  84
  85 /* Right shift with sticky-lsb.
  86  * What this actually means is that we do a standard right-shift,
  87  * but that if any of the bits that fall off the right hand side
  88  * were one then we always set the LSbit.
  89  */
  90 #define _FP_FRAC_SRST_4(X,S,N,size)                     \
  91   do {                                                  \
  92     _FP_I_TYPE _up, _down, _skip, _i;                   \
  93     _FP_W_TYPE _s;                                      \
  94     _skip = (N) / _FP_W_TYPE_SIZE;                      \
  95     _down = (N) % _FP_W_TYPE_SIZE;                      \
  96     _up = _FP_W_TYPE_SIZE - _down;                      \
  97     for (_s = _i = 0; _i < _skip; ++_i)                 \
  98       _s |= X##_f[_i];                                  \
  99     if (!_down)                                         \
 100       for (_i = 0; _i <= 3-_skip; ++_i)                 \
 101         X##_f[_i] = X##_f[_i+_skip];                    \
 102     else                                                \
 103       {                                                 \
 104         _s |= X##_f[_i] << _up;                         \
 105         for (_i = 0; _i < 3-_skip; ++_i)                \
 106           X##_f[_i] = X##_f[_i+_skip] >> _down          \
 107                       | X##_f[_i+_skip+1] << _up;       \
 108         X##_f[_i++] = X##_f[3] >> _down;                \
 109       }                                                 \
 110     for (; _i < 4; ++_i)                                \
 111       X##_f[_i] = 0;                                    \
 112     S = (_s != 0);                                      \
 113   } while (0)
 114
 115 #define _FP_FRAC_SRS_4(X,N,size)                \
 116   do {                                          \
 117     int _sticky;                                \
 118     _FP_FRAC_SRST_4(X, _sticky, N, size);       \
 119     X##_f[0] |= _sticky;                        \
 120   } while (0)
 121
 122 #define _FP_FRAC_ADD_4(R,X,Y)                                           \
 123   __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
 124                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
 125                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 126
 127 #define _FP_FRAC_SUB_4(R,X,Y)                                           \
 128   __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
 129                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
 130                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 131
 132 #define _FP_FRAC_DEC_4(X,Y)                                             \
 133   __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
 134                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 135
 136 #define _FP_FRAC_ADDI_4(X,I)                                            \
 137   __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
 138
 139 #define _FP_ZEROFRAC_4  0,0,0,0
 140 #define _FP_MINFRAC_4   0,0,0,1
 141 #define _FP_MAXFRAC_4   (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
 142
 143 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
 144 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
 145 #define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
 146 #define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
 147
 148 #define _FP_FRAC_EQ_4(X,Y)                              \
 149  (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
 150   && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
 151
 152 #define _FP_FRAC_GT_4(X,Y)                              \
 153  (X##_f[3] > Y##_f[3] ||                                \
 154   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
 155    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
 156     (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
 157    ))                                                   \
 158   ))                                                    \
 159  )
 160
 161 #define _FP_FRAC_GE_4(X,Y)                              \
 162  (X##_f[3] > Y##_f[3] ||                                \
 163   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
 164    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
 165     (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
 166    ))                                                   \
 167   ))                                                    \
 168  )
 169
 170
 171 #define _FP_FRAC_CLZ_4(R,X)             \
 172   do {                                  \
 173     if (X##_f[3])                       \
 174     {                                   \
 175         __FP_CLZ(R,X##_f[3]);           \
 176     }                                   \
 177     else if (X##_f[2])                  \
 178     {                                   \
 179         __FP_CLZ(R,X##_f[2]);           \
 180         R += _FP_W_TYPE_SIZE;           \
 181     }                                   \
 182     else if (X##_f[1])                  \
 183     {                                   \
 184         __FP_CLZ(R,X##_f[1]);           \
 185         R += _FP_W_TYPE_SIZE*2;         \
 186     }                                   \
 187     else                                \
 188     {                                   \
 189         __FP_CLZ(R,X##_f[0]);           \
 190         R += _FP_W_TYPE_SIZE*3;         \
 191     }                                   \
 192   } while(0)
 193
 194
 195 #define _FP_UNPACK_RAW_4(fs, X, val)                            \
 196   do {                                                          \
 197     union _FP_UNION_##fs _flo; _flo.flt = (val);                \
 198     X##_f[0] = _flo.bits.frac0;                                 \
 199     X##_f[1] = _flo.bits.frac1;                                 \
 200     X##_f[2] = _flo.bits.frac2;                                 \
 201     X##_f[3] = _flo.bits.frac3;                                 \
 202     X##_e  = _flo.bits.exp;                                     \
 203     X##_s  = _flo.bits.sign;                                    \
 204   } while (0)
 205
 206 #define _FP_UNPACK_RAW_4_P(fs, X, val)                          \
 207   do {                                                          \
 208     union _FP_UNION_##fs *_flo =                                \
 209       (union _FP_UNION_##fs *)(val);                            \
 210                                                                 \
 211     X##_f[0] = _flo->bits.frac0;                                \
 212     X##_f[1] = _flo->bits.frac1;                                \
 213     X##_f[2] = _flo->bits.frac2;                                \
 214     X##_f[3] = _flo->bits.frac3;                                \
 215     X##_e  = _flo->bits.exp;                                    \
 216     X##_s  = _flo->bits.sign;                                   \
 217   } while (0)
 218
 219 #define _FP_PACK_RAW_4(fs, val, X)                              \
 220   do {                                                          \
 221     union _FP_UNION_##fs _flo;                                  \
 222     _flo.bits.frac0 = X##_f[0];                                 \
 223     _flo.bits.frac1 = X##_f[1];                                 \
 224     _flo.bits.frac2 = X##_f[2];                                 \
 225     _flo.bits.frac3 = X##_f[3];                                 \
 226     _flo.bits.exp   = X##_e;                                    \
 227     _flo.bits.sign  = X##_s;                                    \
 228     (val) = _flo.flt;                                           \
 229   } while (0)
 230
 231 #define _FP_PACK_RAW_4_P(fs, val, X)                            \
 232   do {                                                          \
 233     union _FP_UNION_##fs *_flo =                                \
 234       (union _FP_UNION_##fs *)(val);                            \
 235                                                                 \
 236     _flo->bits.frac0 = X##_f[0];                                \
 237     _flo->bits.frac1 = X##_f[1];                                \
 238     _flo->bits.frac2 = X##_f[2];                                \
 239     _flo->bits.frac3 = X##_f[3];                                \
 240     _flo->bits.exp   = X##_e;                                   \
 241     _flo->bits.sign  = X##_s;                                   \
 242   } while (0)
 243
 244 /*
 245  * Multiplication algorithms:
 246  */
 247
 248 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 249
 250 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                       \
 251   do {                                                                      \
 252     _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);          \
 253     _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);          \
 254                                                                             \
 255     doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
 256     doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                                 \
 257     doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                                 \
 258     doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                                 \
 259     doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                                 \
 260     doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                                 \
 261     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
 262                     _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,                   \
 263                     0,0,_FP_FRAC_WORD_8(_z,1));                             \
 264     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
 265                     _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,                   \
 266                     _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
 267                     _FP_FRAC_WORD_8(_z,1));                                 \
 268     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 269                     _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,                   \
 270                     0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));         \
 271     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 272                     _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,                   \
 273                     _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 274                     _FP_FRAC_WORD_8(_z,2));                                 \
 275     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 276                     _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,                   \
 277                     _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 278                     _FP_FRAC_WORD_8(_z,2));                                 \
 279     doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                                 \
 280     doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                                 \
 281     doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                                 \
 282     doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                                 \
 283     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 284                     _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,                   \
 285                     0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));         \
 286     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 287                     _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,                   \
 288                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 289                     _FP_FRAC_WORD_8(_z,3));                                 \
 290     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 291                     _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,                   \
 292                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 293                     _FP_FRAC_WORD_8(_z,3));                                 \
 294     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 295                     _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,                   \
 296                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 297                     _FP_FRAC_WORD_8(_z,3));                                 \
 298     doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                                 \
 299     doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                                 \
 300     doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                                 \
 301     doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                                 \
 302     doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                                 \
 303     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 304                     _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,                   \
 305                     0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));         \
 306     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 307                     _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,                   \
 308                     _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 309                     _FP_FRAC_WORD_8(_z,4));                                 \
 310     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 311                     _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,                   \
 312                     _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 313                     _FP_FRAC_WORD_8(_z,4));                                 \
 314     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 315                     _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,                   \
 316                     0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));         \
 317     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 318                     _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,                   \
 319                     _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 320                     _FP_FRAC_WORD_8(_z,5));                                 \
 321     doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                                 \
 322     __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 323                     _b_f1,_b_f0,                                            \
 324                     _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));           \
 325                                                                             \
 326     /* Normalize since we know where the msb of the multiplicands           \
 327        were (bit B), we know that the msb of the of the product is          \
 328        at either 2B or 2B-1.  */                                            \
 329     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
 330     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
 331                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
 332   } while (0)
 333
 334 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                              \
 335   do {                                                                      \
 336     _FP_FRAC_DECL_8(_z);                                                    \
 337                                                                             \
 338     mpn_mul_n(_z_f, _x_f, _y_f, 4);                                         \
 339                                                                             \
 340     /* Normalize since we know where the msb of the multiplicands           \
 341        were (bit B), we know that the msb of the of the product is          \
 342        at either 2B or 2B-1.  */                                            \
 343     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
 344     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
 345                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
 346   } while (0)
 347
 348 /*
 349  * Helper utility for _FP_DIV_MEAT_4_udiv:
 350  * pppp = m * nnn
 351  */
 352 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)                               \
 353   do {                                                                      \
 354     UWtype _t;                                                              \
 355     umul_ppmm(p1,p0,m,n0);                                                  \
 356     umul_ppmm(p2,_t,m,n1);                                                  \
 357     __FP_FRAC_ADDI_2(p2,p1,_t);                                             \
 358     umul_ppmm(p3,_t,m,n2);                                                  \
 359     __FP_FRAC_ADDI_2(p3,p2,_t);                                             \
 360   } while (0)
 361
 362 /*
 363  * Division algorithms:
 364  */
 365
 366 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                                    \
 367   do {                                                                      \
 368     int _i;                                                                 \
 369     _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);                               \
 370     _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);                                     \
 371     if (_FP_FRAC_GT_4(X, Y))                                                \
 372       {                                                                     \
 373         _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                        \
 374         _FP_FRAC_SRL_4(X, 1);                                               \
 375       }                                                                     \
 376     else                                                                    \
 377       R##_e--;                                                              \
 378                                                                             \
 379     /* Normalize, i.e. make the most significant bit of the                 \
 380        denominator set. */                                                  \
 381     _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);                                 \
 382                                                                             \
 383     for (_i = 3; ; _i--)                                                    \
 384       {                                                                     \
 385         if (X##_f[3] == Y##_f[3])                                           \
 386           {                                                                 \
 387             /* This is a special case, not an optimization                  \
 388                (X##_f[3]/Y##_f[3] would not fit into UWtype).               \
 389                As X## is guaranteed to be < Y,  R##_f[_i] can be either     \
 390                (UWtype)-1 or (UWtype)-2.  */                                \
 391             R##_f[_i] = -1;                                                 \
 392             if (!_i)                                                        \
 393               break;                                                        \
 394             __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],         \
 395                             Y##_f[2], Y##_f[1], Y##_f[0], 0,                \
 396                             X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);        \
 397             _FP_FRAC_SUB_4(X, Y, X);                                        \
 398             if (X##_f[3] > Y##_f[3])                                        \
 399               {                                                             \
 400                 R##_f[_i] = -2;                                             \
 401                 _FP_FRAC_ADD_4(X, Y, X);                                    \
 402               }                                                             \
 403           }                                                                 \
 404         else                                                                \
 405           {                                                                 \
 406             udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
 407             umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],               \
 408                           R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);         \
 409             X##_f[2] = X##_f[1];                                            \
 410             X##_f[1] = X##_f[0];                                            \
 411             X##_f[0] = _n_f[_i];                                            \
 412             if (_FP_FRAC_GT_4(_m, X))                                       \
 413               {                                                             \
 414                 R##_f[_i]--;                                                \
 415                 _FP_FRAC_ADD_4(X, Y, X);                                    \
 416                 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))            \
 417                   {                                                         \
 418                     R##_f[_i]--;                                            \
 419                     _FP_FRAC_ADD_4(X, Y, X);                                \
 420                   }                                                         \
 421               }                                                             \
 422             _FP_FRAC_DEC_4(X, _m);                                          \
 423             if (!_i)                                                        \
 424               {                                                             \
 425                 if (!_FP_FRAC_EQ_4(X, _m))                                  \
 426                   R##_f[0] |= _FP_WORK_STICKY;                              \
 427                 break;                                                      \
 428               }                                                             \
 429           }                                                                 \
 430       }                                                                     \
 431   } while (0)
 432
 433
 434 /*
 435  * Square root algorithms:
 436  * We have just one right now, maybe Newton approximation
 437  * should be added for those machines where division is fast.
 438  */
 439
 440 #define _FP_SQRT_MEAT_4(R, S, T, X, q)                          \
 441   do {                                                          \
 442     while (q)                                                   \
 443       {                                                         \
 444         T##_f[3] = S##_f[3] + q;                                \
 445         if (T##_f[3] <= X##_f[3])                               \
 446           {                                                     \
 447             S##_f[3] = T##_f[3] + q;                            \
 448             X##_f[3] -= T##_f[3];                               \
 449             R##_f[3] += q;                                      \
 450           }                                                     \
 451         _FP_FRAC_SLL_4(X, 1);                                   \
 452         q >>= 1;                                                \
 453       }                                                         \
 454     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
 455     while (q)                                                   \
 456       {                                                         \
 457         T##_f[2] = S##_f[2] + q;                                \
 458         T##_f[3] = S##_f[3];                                    \
 459         if (T##_f[3] < X##_f[3] ||                              \
 460             (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))     \
 461           {                                                     \
 462             S##_f[2] = T##_f[2] + q;                            \
 463             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
 464             __FP_FRAC_DEC_2(X##_f[3], X##_f[2],                 \
 465                             T##_f[3], T##_f[2]);                \
 466             R##_f[2] += q;                                      \
 467           }                                                     \
 468         _FP_FRAC_SLL_4(X, 1);                                   \
 469         q >>= 1;                                                \
 470       }                                                         \
 471     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
 472     while (q)                                                   \
 473       {                                                         \
 474         T##_f[1] = S##_f[1] + q;                                \
 475         T##_f[2] = S##_f[2];                                    \
 476         T##_f[3] = S##_f[3];                                    \
 477         if (T##_f[3] < X##_f[3] ||                              \
 478             (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||    \
 479              (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))  \
 480           {                                                     \
 481             S##_f[1] = T##_f[1] + q;                            \
 482             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
 483             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
 484             __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],       \
 485                             T##_f[3], T##_f[2], T##_f[1]);      \
 486             R##_f[1] += q;                                      \
 487           }                                                     \
 488         _FP_FRAC_SLL_4(X, 1);                                   \
 489         q >>= 1;                                                \
 490       }                                                         \
 491     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
 492     while (q != _FP_WORK_ROUND)                                 \
 493       {                                                         \
 494         T##_f[0] = S##_f[0] + q;                                \
 495         T##_f[1] = S##_f[1];                                    \
 496         T##_f[2] = S##_f[2];                                    \
 497         T##_f[3] = S##_f[3];                                    \
 498         if (_FP_FRAC_GE_4(X,T))                                 \
 499           {                                                     \
 500             S##_f[0] = T##_f[0] + q;                            \
 501             S##_f[1] += (T##_f[0] > S##_f[0]);                  \
 502             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
 503             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
 504             _FP_FRAC_DEC_4(X, T);                               \
 505             R##_f[0] += q;                                      \
 506           }                                                     \
 507         _FP_FRAC_SLL_4(X, 1);                                   \
 508         q >>= 1;                                                \
 509       }                                                         \
 510     if (!_FP_FRAC_ZEROP_4(X))                                   \
 511       {                                                         \
 512         if (_FP_FRAC_GT_4(X,S))                                 \
 513           R##_f[0] |= _FP_WORK_ROUND;                           \
 514         R##_f[0] |= _FP_WORK_STICKY;                            \
 515       }                                                         \
 516   } while (0)
 517
 518
 519 /*
 520  * Internals
 521  */
 522
 523 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
 524   (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
 525
 526 #ifndef __FP_FRAC_ADD_3
 527 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
 528   do {                                                          \
 529     _FP_W_TYPE _c1, _c2;                                        \
 530     r0 = x0 + y0;                                               \
 531     _c1 = r0 < x0;                                              \
 532     r1 = x1 + y1;                                               \
 533     _c2 = r1 < x1;                                              \
 534     r1 += _c1;                                                  \
 535     _c2 |= r1 < _c1;                                            \
 536     r2 = x2 + y2 + _c2;                                         \
 537   } while (0)
 538 #endif
 539
 540 #ifndef __FP_FRAC_ADD_4
 541 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
 542   do {                                                          \
 543     _FP_W_TYPE _c1, _c2, _c3;                                   \
 544     r0 = x0 + y0;                                               \
 545     _c1 = r0 < x0;                                              \
 546     r1 = x1 + y1;                                               \
 547     _c2 = r1 < x1;                                              \
 548     r1 += _c1;                                                  \
 549     _c2 |= r1 < _c1;                                            \
 550     r2 = x2 + y2;                                               \
 551     _c3 = r2 < x2;                                              \
 552     r2 += _c2;                                                  \
 553     _c3 |= r2 < _c2;                                            \
 554     r3 = x3 + y3 + _c3;                                         \
 555   } while (0)
 556 #endif
 557
 558 #ifndef __FP_FRAC_SUB_3
 559 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
 560   do {                                                          \
 561     _FP_W_TYPE _c1, _c2;                                        \
 562     r0 = x0 - y0;                                               \
 563     _c1 = r0 > x0;                                              \
 564     r1 = x1 - y1;                                               \
 565     _c2 = r1 > x1;                                              \
 566     r1 -= _c1;                                                  \
 567     _c2 |= _c1 && (y1 == x1);                                   \
 568     r2 = x2 - y2 - _c2;                                         \
 569   } while (0)
 570 #endif
 571
 572 #ifndef __FP_FRAC_SUB_4
 573 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
 574   do {                                                          \
 575     _FP_W_TYPE _c1, _c2, _c3;                                   \
 576     r0 = x0 - y0;                                               \
 577     _c1 = r0 > x0;                                              \
 578     r1 = x1 - y1;                                               \
 579     _c2 = r1 > x1;                                              \
 580     r1 -= _c1;                                                  \
 581     _c2 |= _c1 && (y1 == x1);                                   \
 582     r2 = x2 - y2;                                               \
 583     _c3 = r2 > x2;                                              \
 584     r2 -= _c2;                                                  \
 585     _c3 |= _c2 && (y2 == x2);                                   \
 586     r3 = x3 - y3 - _c3;                                         \
 587   } while (0)
 588 #endif
 589
 590 #ifndef __FP_FRAC_DEC_3
 591 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)                              \
 592   do {                                                                  \
 593     UWtype _t0, _t1, _t2;                                               \
 594     _t0 = x0, _t1 = x1, _t2 = x2;                                       \
 595     __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);            \
 596   } while (0)
 597 #endif
 598
 599 #ifndef __FP_FRAC_DEC_4
 600 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)                        \
 601   do {                                                                  \
 602     UWtype _t0, _t1, _t2, _t3;                                          \
 603     _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                             \
 604     __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);         \
 605   } while (0)
 606 #endif
 607
 608 #ifndef __FP_FRAC_ADDI_4
 609 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
 610   do {                                                                  \
 611     UWtype _t;                                                          \
 612     _t = ((x0 += i) < i);                                               \
 613     x1 += _t; _t = (x1 < _t);                                           \
 614     x2 += _t; _t = (x2 < _t);                                           \
 615     x3 += _t;                                                           \
 616   } while (0)
 617 #endif
 618
 619 /* Convert FP values between word sizes. This appears to be more
 620  * complicated than I'd have expected it to be, so these might be
 621  * wrong... These macros are in any case somewhat bogus because they
 622  * use information about what various FRAC_n variables look like
 623  * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
 624  * the ones in op-2.h and op-1.h.
 625  */
 626 #define _FP_FRAC_COPY_1_4(D, S)         (D##_f = S##_f[0])
 627
 628 #define _FP_FRAC_COPY_2_4(D, S)                 \
 629 do {                                            \
 630   D##_f0 = S##_f[0];                            \
 631   D##_f1 = S##_f[1];                            \
 632 } while (0)
 633
 634 /* Assembly/disassembly for converting to/from integral types.
 635  * No shifting or overflow handled here.
 636  */
 637 /* Put the FP value X into r, which is an integer of size rsize. */
 638 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
 639   do {                                                                  \
 640     if (rsize <= _FP_W_TYPE_SIZE)                                       \
 641       r = X##_f[0];                                                     \
 642     else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
 643     {                                                                   \
 644       r = X##_f[1];                                                     \
 645       r <<= _FP_W_TYPE_SIZE;                                            \
 646       r += X##_f[0];                                                    \
 647     }                                                                   \
 648     else                                                                \
 649     {                                                                   \
 650       /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
 651       /* and int == 4words as a single case.                     */     \
 652       r = X##_f[3];                                                     \
 653       r <<= _FP_W_TYPE_SIZE;                                            \
 654       r += X##_f[2];                                                    \
 655       r <<= _FP_W_TYPE_SIZE;                                            \
 656       r += X##_f[1];                                                    \
 657       r <<= _FP_W_TYPE_SIZE;                                            \
 658       r += X##_f[0];                                                    \
 659     }                                                                   \
 660   } while (0)
 661
 662 /* "No disassemble Number Five!" */
 663 /* move an integer of size rsize into X's fractional part. We rely on
 664  * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
 665  * having to mask the values we store into it.
 666  */
 667 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
 668   do {                                                                  \
 669     X##_f[0] = r;                                                       \
 670     X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
 671     X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
 672     X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
 673   } while (0);
 674
 675 #define _FP_FRAC_COPY_4_1(D, S)                 \
 676 do {                                            \
 677   D##_f[0] = S##_f;                             \
 678   D##_f[1] = D##_f[2] = D##_f[3] = 0;           \
 679 } while (0)
 680
 681 #define _FP_FRAC_COPY_4_2(D, S)                 \
 682 do {                                            \
 683   D##_f[0] = S##_f0;                            \
 684   D##_f[1] = S##_f1;                            \
 685   D##_f[2] = D##_f[3] = 0;                      \
 686 } while (0)
 687
 688 #define _FP_FRAC_COPY_4_4(D,S)  _FP_FRAC_COPY_4(D,S)