soft-fp/op-4.h

   1 /* Software floating-point emulation.
   2    Basic four-word fraction declaration and manipulation.
   3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
  34 #define _FP_FRAC_COPY_4(D, S)                   \
  35   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
  36    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
  37 #define _FP_FRAC_SET_4(X, I)    __FP_FRAC_SET_4 (X, I)
  38 #define _FP_FRAC_HIGH_4(X)      (X##_f[3])
  39 #define _FP_FRAC_LOW_4(X)       (X##_f[0])
  40 #define _FP_FRAC_WORD_4(X, w)   (X##_f[w])
  41
  42 #define _FP_FRAC_SLL_4(X, N)                            \
  43   do                                                    \
  44     {                                                   \
  45       _FP_I_TYPE _up, _down, _skip, _i;                 \
  46       _skip = (N) / _FP_W_TYPE_SIZE;                    \
  47       _up = (N) % _FP_W_TYPE_SIZE;                      \
  48       _down = _FP_W_TYPE_SIZE - _up;                    \
  49       if (!_up)                                         \
  50         for (_i = 3; _i >= _skip; --_i)                 \
  51           X##_f[_i] = X##_f[_i-_skip];                  \
  52       else                                              \
  53         {                                               \
  54           for (_i = 3; _i > _skip; --_i)                \
  55             X##_f[_i] = (X##_f[_i-_skip] << _up         \
  56                          | X##_f[_i-_skip-1] >> _down); \
  57           X##_f[_i--] = X##_f[0] << _up;                \
  58         }                                               \
  59       for (; _i >= 0; --_i)                             \
  60         X##_f[_i] = 0;                                  \
  61     }                                                   \
  62   while (0)
  63
  64 /* This one was broken too */
  65 #define _FP_FRAC_SRL_4(X, N)                            \
  66   do                                                    \
  67     {                                                   \
  68       _FP_I_TYPE _up, _down, _skip, _i;                 \
  69       _skip = (N) / _FP_W_TYPE_SIZE;                    \
  70       _down = (N) % _FP_W_TYPE_SIZE;                    \
  71       _up = _FP_W_TYPE_SIZE - _down;                    \
  72       if (!_down)                                       \
  73         for (_i = 0; _i <= 3-_skip; ++_i)               \
  74           X##_f[_i] = X##_f[_i+_skip];                  \
  75       else                                              \
  76         {                                               \
  77           for (_i = 0; _i < 3-_skip; ++_i)              \
  78             X##_f[_i] = (X##_f[_i+_skip] >> _down       \
  79                          | X##_f[_i+_skip+1] << _up);   \
  80           X##_f[_i++] = X##_f[3] >> _down;              \
  81         }                                               \
  82       for (; _i < 4; ++_i)                              \
  83         X##_f[_i] = 0;                                  \
  84     }                                                   \
  85   while (0)
  86
  87
  88 /* Right shift with sticky-lsb.
  89  * What this actually means is that we do a standard right-shift,
  90  * but that if any of the bits that fall off the right hand side
  91  * were one then we always set the LSbit.
  92  */
  93 #define _FP_FRAC_SRST_4(X, S, N, size)                  \
  94   do                                                    \
  95     {                                                   \
  96       _FP_I_TYPE _up, _down, _skip, _i;                 \
  97       _FP_W_TYPE _s;                                    \
  98       _skip = (N) / _FP_W_TYPE_SIZE;                    \
  99       _down = (N) % _FP_W_TYPE_SIZE;                    \
 100       _up = _FP_W_TYPE_SIZE - _down;                    \
 101       for (_s = _i = 0; _i < _skip; ++_i)               \
 102         _s |= X##_f[_i];                                \
 103       if (!_down)                                       \
 104         for (_i = 0; _i <= 3-_skip; ++_i)               \
 105           X##_f[_i] = X##_f[_i+_skip];                  \
 106       else                                              \
 107         {                                               \
 108           _s |= X##_f[_i] << _up;                       \
 109           for (_i = 0; _i < 3-_skip; ++_i)              \
 110             X##_f[_i] = (X##_f[_i+_skip] >> _down       \
 111                          | X##_f[_i+_skip+1] << _up);   \
 112           X##_f[_i++] = X##_f[3] >> _down;              \
 113         }                                               \
 114       for (; _i < 4; ++_i)                              \
 115         X##_f[_i] = 0;                                  \
 116       S = (_s != 0);                                    \
 117     }                                                   \
 118   while (0)
 119
 120 #define _FP_FRAC_SRS_4(X, N, size)              \
 121   do                                            \
 122     {                                           \
 123       int _sticky;                              \
 124       _FP_FRAC_SRST_4 (X, _sticky, N, size);    \
 125       X##_f[0] |= _sticky;                      \
 126     }                                           \
 127   while (0)
 128
 129 #define _FP_FRAC_ADD_4(R, X, Y)                                 \
 130   __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0],      \
 131                    X##_f[3], X##_f[2], X##_f[1], X##_f[0],      \
 132                    Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 133
 134 #define _FP_FRAC_SUB_4(R, X, Y)                                 \
 135   __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0],      \
 136                    X##_f[3], X##_f[2], X##_f[1], X##_f[0],      \
 137                    Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 138
 139 #define _FP_FRAC_DEC_4(X, Y)                                    \
 140   __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0],      \
 141                    Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 142
 143 #define _FP_FRAC_ADDI_4(X, I)                                   \
 144   __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
 145
 146 #define _FP_ZEROFRAC_4  0, 0, 0, 0
 147 #define _FP_MINFRAC_4   0, 0, 0, 1
 148 #define _FP_MAXFRAC_4   (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
 149
 150 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
 151 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE) X##_f[3] < 0)
 152 #define _FP_FRAC_OVERP_4(fs, X)  (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
 153 #define _FP_FRAC_HIGHBIT_DW_4(fs, X)    \
 154   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
 155 #define _FP_FRAC_CLEAR_OVERP_4(fs, X)  (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
 156
 157 #define _FP_FRAC_EQ_4(X, Y)                             \
 158   (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]         \
 159    && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
 160
 161 #define _FP_FRAC_GT_4(X, Y)                             \
 162   (X##_f[3] > Y##_f[3]                                  \
 163    || (X##_f[3] == Y##_f[3]                             \
 164        && (X##_f[2] > Y##_f[2]                          \
 165            || (X##_f[2] == Y##_f[2]                     \
 166                && (X##_f[1] > Y##_f[1]                  \
 167                    || (X##_f[1] == Y##_f[1]             \
 168                        && X##_f[0] > Y##_f[0]))))))
 169
 170 #define _FP_FRAC_GE_4(X, Y)                             \
 171   (X##_f[3] > Y##_f[3]                                  \
 172    || (X##_f[3] == Y##_f[3]                             \
 173        && (X##_f[2] > Y##_f[2]                          \
 174            || (X##_f[2] == Y##_f[2]                     \
 175                && (X##_f[1] > Y##_f[1]                  \
 176                    || (X##_f[1] == Y##_f[1]             \
 177                        && X##_f[0] >= Y##_f[0]))))))
 178
 179
 180 #define _FP_FRAC_CLZ_4(R, X)                    \
 181   do                                            \
 182     {                                           \
 183       if (X##_f[3])                             \
 184         __FP_CLZ (R, X##_f[3]);                 \
 185       else if (X##_f[2])                        \
 186         {                                       \
 187           __FP_CLZ (R, X##_f[2]);               \
 188           R += _FP_W_TYPE_SIZE;                 \
 189         }                                       \
 190       else if (X##_f[1])                        \
 191         {                                       \
 192           __FP_CLZ (R, X##_f[1]);               \
 193           R += _FP_W_TYPE_SIZE*2;               \
 194         }                                       \
 195       else                                      \
 196         {                                       \
 197           __FP_CLZ (R, X##_f[0]);               \
 198           R += _FP_W_TYPE_SIZE*3;               \
 199         }                                       \
 200     }                                           \
 201   while (0)
 202
 203
 204 #define _FP_UNPACK_RAW_4(fs, X, val)            \
 205   do                                            \
 206     {                                           \
 207       union _FP_UNION_##fs _flo;                \
 208       _flo.flt = (val);                         \
 209       X##_f[0] = _flo.bits.frac0;               \
 210       X##_f[1] = _flo.bits.frac1;               \
 211       X##_f[2] = _flo.bits.frac2;               \
 212       X##_f[3] = _flo.bits.frac3;               \
 213       X##_e  = _flo.bits.exp;                   \
 214       X##_s  = _flo.bits.sign;                  \
 215     }                                           \
 216   while (0)
 217
 218 #define _FP_UNPACK_RAW_4_P(fs, X, val)                                  \
 219   do                                                                    \
 220     {                                                                   \
 221       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);      \
 222                                                                         \
 223       X##_f[0] = _flo->bits.frac0;                                      \
 224       X##_f[1] = _flo->bits.frac1;                                      \
 225       X##_f[2] = _flo->bits.frac2;                                      \
 226       X##_f[3] = _flo->bits.frac3;                                      \
 227       X##_e  = _flo->bits.exp;                                          \
 228       X##_s  = _flo->bits.sign;                                         \
 229     }                                                                   \
 230   while (0)
 231
 232 #define _FP_PACK_RAW_4(fs, val, X)              \
 233   do                                            \
 234     {                                           \
 235       union _FP_UNION_##fs _flo;                \
 236       _flo.bits.frac0 = X##_f[0];               \
 237       _flo.bits.frac1 = X##_f[1];               \
 238       _flo.bits.frac2 = X##_f[2];               \
 239       _flo.bits.frac3 = X##_f[3];               \
 240       _flo.bits.exp   = X##_e;                  \
 241       _flo.bits.sign  = X##_s;                  \
 242       (val) = _flo.flt;                         \
 243     }                                           \
 244   while (0)
 245
 246 #define _FP_PACK_RAW_4_P(fs, val, X)                                    \
 247   do                                                                    \
 248     {                                                                   \
 249       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);      \
 250                                                                         \
 251       _flo->bits.frac0 = X##_f[0];                                      \
 252       _flo->bits.frac1 = X##_f[1];                                      \
 253       _flo->bits.frac2 = X##_f[2];                                      \
 254       _flo->bits.frac3 = X##_f[3];                                      \
 255       _flo->bits.exp   = X##_e;                                         \
 256       _flo->bits.sign  = X##_s;                                         \
 257     }                                                                   \
 258   while (0)
 259
 260 /*
 261  * Multiplication algorithms:
 262  */
 263
 264 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 265
 266 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit)                \
 267   do                                                                    \
 268     {                                                                   \
 269       _FP_FRAC_DECL_2 (_b);                                             \
 270       _FP_FRAC_DECL_2 (_c);                                             \
 271       _FP_FRAC_DECL_2 (_d);                                             \
 272       _FP_FRAC_DECL_2 (_e);                                             \
 273       _FP_FRAC_DECL_2 (_f);                                             \
 274                                                                         \
 275       doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), X##_f[0], Y##_f[0]); \
 276       doit (_b_f1, _b_f0, X##_f[0], Y##_f[1]);                          \
 277       doit (_c_f1, _c_f0, X##_f[1], Y##_f[0]);                          \
 278       doit (_d_f1, _d_f0, X##_f[1], Y##_f[1]);                          \
 279       doit (_e_f1, _e_f0, X##_f[0], Y##_f[2]);                          \
 280       doit (_f_f1, _f_f0, X##_f[2], Y##_f[0]);                          \
 281       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2),  \
 282                        _FP_FRAC_WORD_8 (R, 1), 0, _b_f1, _b_f0,         \
 283                        0, 0, _FP_FRAC_WORD_8 (R, 1));                   \
 284       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2),  \
 285                        _FP_FRAC_WORD_8 (R, 1), 0, _c_f1, _c_f0,         \
 286                        _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2),  \
 287                        _FP_FRAC_WORD_8 (R, 1));                         \
 288       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),  \
 289                        _FP_FRAC_WORD_8 (R, 2), 0, _d_f1, _d_f0,         \
 290                        0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
 291       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),  \
 292                        _FP_FRAC_WORD_8 (R, 2), 0, _e_f1, _e_f0,         \
 293                        _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),  \
 294                        _FP_FRAC_WORD_8 (R, 2));                         \
 295       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),  \
 296                        _FP_FRAC_WORD_8 (R, 2), 0, _f_f1, _f_f0,         \
 297                        _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),  \
 298                        _FP_FRAC_WORD_8 (R, 2));                         \
 299       doit (_b_f1, _b_f0, X##_f[0], Y##_f[3]);                          \
 300       doit (_c_f1, _c_f0, X##_f[3], Y##_f[0]);                          \
 301       doit (_d_f1, _d_f0, X##_f[1], Y##_f[2]);                          \
 302       doit (_e_f1, _e_f0, X##_f[2], Y##_f[1]);                          \
 303       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 304                        _FP_FRAC_WORD_8 (R, 3), 0, _b_f1, _b_f0,         \
 305                        0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
 306       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 307                        _FP_FRAC_WORD_8 (R, 3), 0, _c_f1, _c_f0,         \
 308                        _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 309                        _FP_FRAC_WORD_8 (R, 3));                         \
 310       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 311                        _FP_FRAC_WORD_8 (R, 3), 0, _d_f1, _d_f0,         \
 312                        _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 313                        _FP_FRAC_WORD_8 (R, 3));                         \
 314       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 315                        _FP_FRAC_WORD_8 (R, 3), 0, _e_f1, _e_f0,         \
 316                        _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),  \
 317                        _FP_FRAC_WORD_8 (R, 3));                         \
 318       doit (_b_f1, _b_f0, X##_f[2], Y##_f[2]);                          \
 319       doit (_c_f1, _c_f0, X##_f[1], Y##_f[3]);                          \
 320       doit (_d_f1, _d_f0, X##_f[3], Y##_f[1]);                          \
 321       doit (_e_f1, _e_f0, X##_f[2], Y##_f[3]);                          \
 322       doit (_f_f1, _f_f0, X##_f[3], Y##_f[2]);                          \
 323       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),  \
 324                        _FP_FRAC_WORD_8 (R, 4), 0, _b_f1, _b_f0,         \
 325                        0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
 326       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),  \
 327                        _FP_FRAC_WORD_8 (R, 4), 0, _c_f1, _c_f0,         \
 328                        _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),  \
 329                        _FP_FRAC_WORD_8 (R, 4));                         \
 330       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),  \
 331                        _FP_FRAC_WORD_8 (R, 4), 0, _d_f1, _d_f0,         \
 332                        _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),  \
 333                        _FP_FRAC_WORD_8 (R, 4));                         \
 334       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),  \
 335                        _FP_FRAC_WORD_8 (R, 5), 0, _e_f1, _e_f0,         \
 336                        0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
 337       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),  \
 338                        _FP_FRAC_WORD_8 (R, 5), 0, _f_f1, _f_f0,         \
 339                        _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),  \
 340                        _FP_FRAC_WORD_8 (R, 5));                         \
 341       doit (_b_f1, _b_f0, X##_f[3], Y##_f[3]);                          \
 342       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),  \
 343                        _b_f1, _b_f0,                                    \
 344                        _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
 345     }                                                                   \
 346   while (0)
 347
 348 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                   \
 349   do                                                                    \
 350     {                                                                   \
 351       _FP_FRAC_DECL_8 (_z);                                             \
 352                                                                         \
 353       _FP_MUL_MEAT_DW_4_wide (wfracbits, _z, X, Y, doit);               \
 354                                                                         \
 355       /* Normalize since we know where the msb of the multiplicands     \
 356          were (bit B), we know that the msb of the of the product is    \
 357          at either 2B or 2B-1.  */                                      \
 358       _FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits);                    \
 359       __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \
 360                        _FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \
 361     }                                                                   \
 362   while (0)
 363
 364 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y)       \
 365   do                                                    \
 366     {                                                   \
 367       mpn_mul_n (R##_f, _x_f, _y_f, 4);                 \
 368     }                                                   \
 369   while (0)
 370
 371 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                          \
 372   do                                                                    \
 373     {                                                                   \
 374       _FP_FRAC_DECL_8 (_z);                                             \
 375                                                                         \
 376       _FP_MUL_MEAT_DW_4_gmp (wfracbits, _z, X, Y);                      \
 377                                                                         \
 378       /* Normalize since we know where the msb of the multiplicands     \
 379          were (bit B), we know that the msb of the of the product is    \
 380          at either 2B or 2B-1.  */                                      \
 381       _FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits);                    \
 382       __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \
 383                        _FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \
 384     }                                                                   \
 385   while (0)
 386
 387 /*
 388  * Helper utility for _FP_DIV_MEAT_4_udiv:
 389  * pppp = m * nnn
 390  */
 391 #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0)    \
 392   do                                                    \
 393     {                                                   \
 394       UWtype _t;                                        \
 395       umul_ppmm (p1, p0, m, n0);                        \
 396       umul_ppmm (p2, _t, m, n1);                        \
 397       __FP_FRAC_ADDI_2 (p2, p1, _t);                    \
 398       umul_ppmm (p3, _t, m, n2);                        \
 399       __FP_FRAC_ADDI_2 (p3, p2, _t);                    \
 400     }                                                   \
 401   while (0)
 402
 403 /*
 404  * Division algorithms:
 405  */
 406
 407 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                                \
 408   do                                                                    \
 409     {                                                                   \
 410       int _i;                                                           \
 411       _FP_FRAC_DECL_4 (_n);                                             \
 412       _FP_FRAC_DECL_4 (_m);                                             \
 413       _FP_FRAC_SET_4 (_n, _FP_ZEROFRAC_4);                              \
 414       if (_FP_FRAC_GE_4 (X, Y))                                         \
 415         {                                                               \
 416           _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                  \
 417           _FP_FRAC_SRL_4 (X, 1);                                        \
 418         }                                                               \
 419       else                                                              \
 420         R##_e--;                                                        \
 421                                                                         \
 422       /* Normalize, i.e. make the most significant bit of the           \
 423          denominator set. */                                            \
 424       _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs);                          \
 425                                                                         \
 426       for (_i = 3; ; _i--)                                              \
 427         {                                                               \
 428           if (X##_f[3] == Y##_f[3])                                     \
 429             {                                                           \
 430               /* This is a special case, not an optimization            \
 431                  (X##_f[3]/Y##_f[3] would not fit into UWtype).         \
 432                  As X## is guaranteed to be < Y,  R##_f[_i] can be either \
 433                  (UWtype)-1 or (UWtype)-2.  */                          \
 434               R##_f[_i] = -1;                                           \
 435               if (!_i)                                                  \
 436                 break;                                                  \
 437               __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0],  \
 438                                Y##_f[2], Y##_f[1], Y##_f[0], 0,         \
 439                                X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \
 440               _FP_FRAC_SUB_4 (X, Y, X);                                 \
 441               if (X##_f[3] > Y##_f[3])                                  \
 442                 {                                                       \
 443                   R##_f[_i] = -2;                                       \
 444                   _FP_FRAC_ADD_4 (X, Y, X);                             \
 445                 }                                                       \
 446             }                                                           \
 447           else                                                          \
 448             {                                                           \
 449               udiv_qrnnd (R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
 450               umul_ppppmnnn (_m_f[3], _m_f[2], _m_f[1], _m_f[0],        \
 451                              R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);  \
 452               X##_f[2] = X##_f[1];                                      \
 453               X##_f[1] = X##_f[0];                                      \
 454               X##_f[0] = _n_f[_i];                                      \
 455               if (_FP_FRAC_GT_4 (_m, X))                                \
 456                 {                                                       \
 457                   R##_f[_i]--;                                          \
 458                   _FP_FRAC_ADD_4 (X, Y, X);                             \
 459                   if (_FP_FRAC_GE_4 (X, Y) && _FP_FRAC_GT_4 (_m, X))    \
 460                     {                                                   \
 461                       R##_f[_i]--;                                      \
 462                       _FP_FRAC_ADD_4 (X, Y, X);                         \
 463                     }                                                   \
 464                 }                                                       \
 465               _FP_FRAC_DEC_4 (X, _m);                                   \
 466               if (!_i)                                                  \
 467                 {                                                       \
 468                   if (!_FP_FRAC_EQ_4 (X, _m))                           \
 469                     R##_f[0] |= _FP_WORK_STICKY;                        \
 470                   break;                                                \
 471                 }                                                       \
 472             }                                                           \
 473         }                                                               \
 474     }                                                                   \
 475   while (0)
 476
 477
 478 /*
 479  * Square root algorithms:
 480  * We have just one right now, maybe Newton approximation
 481  * should be added for those machines where division is fast.
 482  */
 483
 484 #define _FP_SQRT_MEAT_4(R, S, T, X, q)                                  \
 485   do                                                                    \
 486     {                                                                   \
 487       while (q)                                                         \
 488         {                                                               \
 489           T##_f[3] = S##_f[3] + q;                                      \
 490           if (T##_f[3] <= X##_f[3])                                     \
 491             {                                                           \
 492               S##_f[3] = T##_f[3] + q;                                  \
 493               X##_f[3] -= T##_f[3];                                     \
 494               R##_f[3] += q;                                            \
 495             }                                                           \
 496           _FP_FRAC_SLL_4 (X, 1);                                        \
 497           q >>= 1;                                                      \
 498         }                                                               \
 499       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);                      \
 500       while (q)                                                         \
 501         {                                                               \
 502           T##_f[2] = S##_f[2] + q;                                      \
 503           T##_f[3] = S##_f[3];                                          \
 504           if (T##_f[3] < X##_f[3]                                       \
 505               || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))        \
 506             {                                                           \
 507               S##_f[2] = T##_f[2] + q;                                  \
 508               S##_f[3] += (T##_f[2] > S##_f[2]);                        \
 509               __FP_FRAC_DEC_2 (X##_f[3], X##_f[2],                      \
 510                                T##_f[3], T##_f[2]);                     \
 511               R##_f[2] += q;                                            \
 512             }                                                           \
 513           _FP_FRAC_SLL_4 (X, 1);                                        \
 514           q >>= 1;                                                      \
 515         }                                                               \
 516       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);                      \
 517       while (q)                                                         \
 518         {                                                               \
 519           T##_f[1] = S##_f[1] + q;                                      \
 520           T##_f[2] = S##_f[2];                                          \
 521           T##_f[3] = S##_f[3];                                          \
 522           if (T##_f[3] < X##_f[3]                                       \
 523               || (T##_f[3] == X##_f[3]                                  \
 524                   && (T##_f[2] < X##_f[2]                               \
 525                       || (T##_f[2] == X##_f[2]                          \
 526                           && T##_f[1] <= X##_f[1]))))                   \
 527             {                                                           \
 528               S##_f[1] = T##_f[1] + q;                                  \
 529               S##_f[2] += (T##_f[1] > S##_f[1]);                        \
 530               S##_f[3] += (T##_f[2] > S##_f[2]);                        \
 531               __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1],            \
 532                                T##_f[3], T##_f[2], T##_f[1]);           \
 533               R##_f[1] += q;                                            \
 534             }                                                           \
 535           _FP_FRAC_SLL_4 (X, 1);                                        \
 536           q >>= 1;                                                      \
 537         }                                                               \
 538       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);                      \
 539       while (q != _FP_WORK_ROUND)                                       \
 540         {                                                               \
 541           T##_f[0] = S##_f[0] + q;                                      \
 542           T##_f[1] = S##_f[1];                                          \
 543           T##_f[2] = S##_f[2];                                          \
 544           T##_f[3] = S##_f[3];                                          \
 545           if (_FP_FRAC_GE_4 (X, T))                                     \
 546             {                                                           \
 547               S##_f[0] = T##_f[0] + q;                                  \
 548               S##_f[1] += (T##_f[0] > S##_f[0]);                        \
 549               S##_f[2] += (T##_f[1] > S##_f[1]);                        \
 550               S##_f[3] += (T##_f[2] > S##_f[2]);                        \
 551               _FP_FRAC_DEC_4 (X, T);                                    \
 552               R##_f[0] += q;                                            \
 553             }                                                           \
 554           _FP_FRAC_SLL_4 (X, 1);                                        \
 555           q >>= 1;                                                      \
 556         }                                                               \
 557       if (!_FP_FRAC_ZEROP_4 (X))                                        \
 558         {                                                               \
 559           if (_FP_FRAC_GT_4 (X, S))                                     \
 560             R##_f[0] |= _FP_WORK_ROUND;                                 \
 561           R##_f[0] |= _FP_WORK_STICKY;                                  \
 562         }                                                               \
 563     }                                                                   \
 564   while (0)
 565
 566
 567 /*
 568  * Internals
 569  */
 570
 571 #define __FP_FRAC_SET_4(X, I3, I2, I1, I0)                      \
 572   (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
 573
 574 #ifndef __FP_FRAC_ADD_3
 575 # define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0)    \
 576   do                                                            \
 577     {                                                           \
 578       _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2;        \
 579       r0 = x0 + y0;                                             \
 580       __FP_FRAC_ADD_3_c1 = r0 < x0;                             \
 581       r1 = x1 + y1;                                             \
 582       __FP_FRAC_ADD_3_c2 = r1 < x1;                             \
 583       r1 += __FP_FRAC_ADD_3_c1;                                 \
 584       __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1;            \
 585       r2 = x2 + y2 + __FP_FRAC_ADD_3_c2;                        \
 586     }                                                           \
 587   while (0)
 588 #endif
 589
 590 #ifndef __FP_FRAC_ADD_4
 591 # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
 592   do                                                                    \
 593     {                                                                   \
 594       _FP_W_TYPE _c1, _c2, _c3;                                         \
 595       r0 = x0 + y0;                                                     \
 596       _c1 = r0 < x0;                                                    \
 597       r1 = x1 + y1;                                                     \
 598       _c2 = r1 < x1;                                                    \
 599       r1 += _c1;                                                        \
 600       _c2 |= r1 < _c1;                                                  \
 601       r2 = x2 + y2;                                                     \
 602       _c3 = r2 < x2;                                                    \
 603       r2 += _c2;                                                        \
 604       _c3 |= r2 < _c2;                                                  \
 605       r3 = x3 + y3 + _c3;                                               \
 606     }                                                                   \
 607   while (0)
 608 #endif
 609
 610 #ifndef __FP_FRAC_SUB_3
 611 # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0)    \
 612   do                                                            \
 613     {                                                           \
 614       _FP_W_TYPE _c1, _c2;                                      \
 615       r0 = x0 - y0;                                             \
 616       _c1 = r0 > x0;                                            \
 617       r1 = x1 - y1;                                             \
 618       _c2 = r1 > x1;                                            \
 619       r1 -= _c1;                                                \
 620       _c2 |= _c1 && (y1 == x1);                                 \
 621       r2 = x2 - y2 - _c2;                                       \
 622     }                                                           \
 623   while (0)
 624 #endif
 625
 626 #ifndef __FP_FRAC_SUB_4
 627 # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
 628   do                                                                    \
 629     {                                                                   \
 630       _FP_W_TYPE _c1, _c2, _c3;                                         \
 631       r0 = x0 - y0;                                                     \
 632       _c1 = r0 > x0;                                                    \
 633       r1 = x1 - y1;                                                     \
 634       _c2 = r1 > x1;                                                    \
 635       r1 -= _c1;                                                        \
 636       _c2 |= _c1 && (y1 == x1);                                         \
 637       r2 = x2 - y2;                                                     \
 638       _c3 = r2 > x2;                                                    \
 639       r2 -= _c2;                                                        \
 640       _c3 |= _c2 && (y2 == x2);                                         \
 641       r3 = x3 - y3 - _c3;                                               \
 642     }                                                                   \
 643   while (0)
 644 #endif
 645
 646 #ifndef __FP_FRAC_DEC_3
 647 # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0)                \
 648   do                                                            \
 649     {                                                           \
 650       UWtype _t0, _t1, _t2;                                     \
 651       _t0 = x0, _t1 = x1, _t2 = x2;                             \
 652       __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);  \
 653     }                                                           \
 654   while (0)
 655 #endif
 656
 657 #ifndef __FP_FRAC_DEC_4
 658 # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0)                \
 659   do                                                                    \
 660     {                                                                   \
 661       UWtype _t0, _t1, _t2, _t3;                                        \
 662       _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                           \
 663       __FP_FRAC_SUB_4 (x3, x2, x1, x0, _t3, _t2, _t1, _t0, y3, y2, y1, y0); \
 664     }                                                                   \
 665   while (0)
 666 #endif
 667
 668 #ifndef __FP_FRAC_ADDI_4
 669 # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i)    \
 670   do                                            \
 671     {                                           \
 672       UWtype _t;                                \
 673       _t = ((x0 += i) < i);                     \
 674       x1 += _t;                                 \
 675       _t = (x1 < _t);                           \
 676       x2 += _t;                                 \
 677       _t = (x2 < _t);                           \
 678       x3 += _t;                                 \
 679     }                                           \
 680   while (0)
 681 #endif
 682
 683 /* Convert FP values between word sizes. This appears to be more
 684  * complicated than I'd have expected it to be, so these might be
 685  * wrong... These macros are in any case somewhat bogus because they
 686  * use information about what various FRAC_n variables look like
 687  * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
 688  * the ones in op-2.h and op-1.h.
 689  */
 690 #define _FP_FRAC_COPY_1_4(D, S)         (D##_f = S##_f[0])
 691
 692 #define _FP_FRAC_COPY_2_4(D, S)                 \
 693   do                                            \
 694     {                                           \
 695       D##_f0 = S##_f[0];                        \
 696       D##_f1 = S##_f[1];                        \
 697     }                                           \
 698   while (0)
 699
 700 /* Assembly/disassembly for converting to/from integral types.
 701  * No shifting or overflow handled here.
 702  */
 703 /* Put the FP value X into r, which is an integer of size rsize. */
 704 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
 705   do                                                                    \
 706     {                                                                   \
 707       if (rsize <= _FP_W_TYPE_SIZE)                                     \
 708         r = X##_f[0];                                                   \
 709       else if (rsize <= 2*_FP_W_TYPE_SIZE)                              \
 710         {                                                               \
 711           r = X##_f[1];                                                 \
 712           r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);    \
 713           r += X##_f[0];                                                \
 714         }                                                               \
 715       else                                                              \
 716         {                                                               \
 717           /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
 718           /* and int == 4words as a single case.                         */ \
 719           r = X##_f[3];                                                 \
 720           r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);    \
 721           r += X##_f[2];                                                \
 722           r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);    \
 723           r += X##_f[1];                                                \
 724           r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);    \
 725           r += X##_f[0];                                                \
 726         }                                                               \
 727     }                                                                   \
 728   while (0)
 729
 730 /* "No disassemble Number Five!" */
 731 /* move an integer of size rsize into X's fractional part. We rely on
 732  * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
 733  * having to mask the values we store into it.
 734  */
 735 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
 736   do                                                                    \
 737     {                                                                   \
 738       X##_f[0] = r;                                                     \
 739       X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
 740       X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
 741       X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
 742     }                                                                   \
 743   while (0)
 744
 745 #define _FP_FRAC_COPY_4_1(D, S)                 \
 746   do                                            \
 747     {                                           \
 748       D##_f[0] = S##_f;                         \
 749       D##_f[1] = D##_f[2] = D##_f[3] = 0;       \
 750     }                                           \
 751   while (0)
 752
 753 #define _FP_FRAC_COPY_4_2(D, S)                 \
 754   do                                            \
 755     {                                           \
 756       D##_f[0] = S##_f0;                        \
 757       D##_f[1] = S##_f1;                        \
 758       D##_f[2] = D##_f[3] = 0;                  \
 759     }                                           \
 760   while (0)
 761
 762 #define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)