stdlib/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, see
  29    <http://www.gnu.org/licenses/>.  */
  30
  31 /* You have to define the following before including this file:
  32
  33    UWtype -- An unsigned type, default type for operations (typically a "word")
  34    UHWtype -- An unsigned type, at least half the size of UWtype.
  35    UDWtype -- An unsigned type, at least twice as large a UWtype
  36    W_TYPE_SIZE -- size in bits of UWtype
  37
  38    UQItype -- Unsigned 8 bit type.
  39    SItype, USItype -- Signed and unsigned 32 bit types.
  40    DItype, UDItype -- Signed and unsigned 64 bit types.
  41
  42    On a 32 bit machine UWtype should typically be USItype;
  43    on a 64 bit machine, UWtype should typically be UDItype.  */
  44
  45 #define __BITS4 (W_TYPE_SIZE / 4)
  46 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  47 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  48 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  49
  50 #ifndef W_TYPE_SIZE
  51 #define W_TYPE_SIZE     32
  52 #define UWtype          USItype
  53 #define UHWtype         USItype
  54 #define UDWtype         UDItype
  55 #endif
  56
  57 /* Used in glibc only.  */
  58 #ifndef attribute_hidden
  59 #define attribute_hidden
  60 #endif
  61
  62 extern const UQItype __clz_tab[256] attribute_hidden;
  63
  64 /* Define auxiliary asm macros.
  65
  66    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  67    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  68    word product in HIGH_PROD and LOW_PROD.
  69
  70    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  71    UDWtype product.  This is just a variant of umul_ppmm.
  72
  73    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  74    denominator) divides a UDWtype, composed by the UWtype integers
  75    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  76    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  77    than DENOMINATOR for correct operation.  If, in addition, the most
  78    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  79    UDIV_NEEDS_NORMALIZATION is defined to 1.
  80
  81    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  82    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  83    is rounded towards 0.
  84
  85    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  86    msb to the first nonzero bit in the UWtype X.  This is the number of
  87    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  88    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  89
  90    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  91    from the least significant end.
  92
  93    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  94    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  95    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  96    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  97    (i.e. carry out) is not stored anywhere, and is lost.
  98
  99    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 100    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 101    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 102    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 103    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 104    and is lost.
 105
 106    If any of these macros are left undefined for a particular CPU,
 107    C macros are used.  */
 108
 109 /* The CPUs come in alphabetical order below.
 110
 111    Please add support for more CPUs here, or improve the current support
 112    for the CPUs below!
 113    (E.g. WE32100, IBM360.)  */
 114
 115 #if defined (__GNUC__) && !defined (NO_ASM)
 116
 117 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 118    understood by gcc1.  Use cpp to avoid major code duplication.  */
 119 #if __GNUC__ < 2
 120 #define __CLOBBER_CC
 121 #define __AND_CLOBBER_CC
 122 #else /* __GNUC__ >= 2 */
 123 #define __CLOBBER_CC : "cc"
 124 #define __AND_CLOBBER_CC , "cc"
 125 #endif /* __GNUC__ < 2 */
 126
 127 #if defined (__alpha) && W_TYPE_SIZE == 64
 128 #define umul_ppmm(ph, pl, m0, m1) \
 129   do {                                                                  \
 130     UDItype __m0 = (m0), __m1 = (m1);                                   \
 131     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 132     (pl) = __m0 * __m1;                                                 \
 133   } while (0)
 134 #define UMUL_TIME 46
 135 #ifndef LONGLONG_STANDALONE
 136 #define udiv_qrnnd(q, r, n1, n0, d) \
 137   do { UDItype __r;                                                     \
 138     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 139     (r) = __r;                                                          \
 140   } while (0)
 141 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 142 #define UDIV_TIME 220
 143 #endif /* LONGLONG_STANDALONE */
 144 #ifdef __alpha_cix__
 145 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 146 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 147 #define COUNT_LEADING_ZEROS_0 64
 148 #else
 149 #define count_leading_zeros(COUNT,X) \
 150   do {                                                                  \
 151     UDItype __xr = (X), __t, __a;                                       \
 152     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 153     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 154     __t = __builtin_alpha_extbl (__xr, __a);                            \
 155     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 156   } while (0)
 157 #define count_trailing_zeros(COUNT,X) \
 158   do {                                                                  \
 159     UDItype __xr = (X), __t, __a;                                       \
 160     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 161     __t = ~__t & -~__t;                                                 \
 162     __a = ((__t & 0xCC) != 0) * 2;                                      \
 163     __a += ((__t & 0xF0) != 0) * 4;                                     \
 164     __a += ((__t & 0xAA) != 0);                                         \
 165     __t = __builtin_alpha_extbl (__xr, __a);                            \
 166     __a <<= 3;                                                          \
 167     __t &= -__t;                                                        \
 168     __a += ((__t & 0xCC) != 0) * 2;                                     \
 169     __a += ((__t & 0xF0) != 0) * 4;                                     \
 170     __a += ((__t & 0xAA) != 0);                                         \
 171     (COUNT) = __a;                                                      \
 172   } while (0)
 173 #endif /* __alpha_cix__ */
 174 #endif /* __alpha */
 175
 176 #if defined (__arc__) && W_TYPE_SIZE == 32
 177 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 178   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 179            : "=r" ((USItype) (sh)),                                     \
 180              "=&r" ((USItype) (sl))                                     \
 181            : "%r" ((USItype) (ah)),                                     \
 182              "rIJ" ((USItype) (bh)),                                    \
 183              "%r" ((USItype) (al)),                                     \
 184              "rIJ" ((USItype) (bl)))
 185 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 186   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 187            : "=r" ((USItype) (sh)),                                     \
 188              "=&r" ((USItype) (sl))                                     \
 189            : "r" ((USItype) (ah)),                                      \
 190              "rIJ" ((USItype) (bh)),                                    \
 191              "r" ((USItype) (al)),                                      \
 192              "rIJ" ((USItype) (bl)))
 193 /* Call libgcc routine.  */
 194 #define umul_ppmm(w1, w0, u, v) \
 195 do {                                                                    \
 196   DWunion __w;                                                          \
 197   __w.ll = __umulsidi3 (u, v);                                          \
 198   w1 = __w.s.high;                                                      \
 199   w0 = __w.s.low;                                                       \
 200 } while (0)
 201 #define __umulsidi3 __umulsidi3
 202 UDItype __umulsidi3 (USItype, USItype);
 203 #endif
 204
 205 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
 206  && W_TYPE_SIZE == 32
 207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 209            : "=r" ((USItype) (sh)),                                     \
 210              "=&r" ((USItype) (sl))                                     \
 211            : "%r" ((USItype) (ah)),                                     \
 212              "rI" ((USItype) (bh)),                                     \
 213              "%r" ((USItype) (al)),                                     \
 214              "rI" ((USItype) (bl)) __CLOBBER_CC)
 215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 217            : "=r" ((USItype) (sh)),                                     \
 218              "=&r" ((USItype) (sl))                                     \
 219            : "r" ((USItype) (ah)),                                      \
 220              "rI" ((USItype) (bh)),                                     \
 221              "r" ((USItype) (al)),                                      \
 222              "rI" ((USItype) (bl)) __CLOBBER_CC)
 223 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
 224      || defined(__ARM_ARCH_3__)
 225 #  define umul_ppmm(xh, xl, a, b)                                       \
 226   do {                                                                  \
 227     register USItype __t0, __t1, __t2;                                  \
 228     __asm__ ("%@ Inlined umul_ppmm\n"                                   \
 229            "    mov     %2, %5, lsr #16\n"                              \
 230            "    mov     %0, %6, lsr #16\n"                              \
 231            "    bic     %3, %5, %2, lsl #16\n"                          \
 232            "    bic     %4, %6, %0, lsl #16\n"                          \
 233            "    mul     %1, %3, %4\n"                                   \
 234            "    mul     %4, %2, %4\n"                                   \
 235            "    mul     %3, %0, %3\n"                                   \
 236            "    mul     %0, %2, %0\n"                                   \
 237            "    adds    %3, %4, %3\n"                                   \
 238            "    addcs   %0, %0, #65536\n"                               \
 239            "    adds    %1, %1, %3, lsl #16\n"                          \
 240            "    adc     %0, %0, %3, lsr #16"                            \
 241            : "=&r" ((USItype) (xh)),                                    \
 242              "=r" ((USItype) (xl)),                                     \
 243              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 244            : "r" ((USItype) (a)),                                       \
 245              "r" ((USItype) (b)) __CLOBBER_CC );                        \
 246   } while (0)
 247 #  define UMUL_TIME 20
 248 # else
 249 #  define umul_ppmm(xh, xl, a, b)                                       \
 250   do {                                                                  \
 251     /* Generate umull, under compiler control.  */                      \
 252     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);       \
 253     (xl) = (USItype)__t0;                                               \
 254     (xh) = (USItype)(__t0 >> 32);                                       \
 255   } while (0)
 256 #  define UMUL_TIME 3
 257 # endif
 258 # define UDIV_TIME 100
 259 #endif /* __arm__ */
 260
 261 #if defined(__arm__)
 262 /* Let gcc decide how best to implement count_leading_zeros.  */
 263 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 264 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
 265 #define COUNT_LEADING_ZEROS_0 32
 266 #endif
 267
 268 #if defined (__AVR__)
 269
 270 #if W_TYPE_SIZE == 16
 271 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 272 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
 273 #define COUNT_LEADING_ZEROS_0 16
 274 #endif /* W_TYPE_SIZE == 16 */
 275
 276 #if W_TYPE_SIZE == 32
 277 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
 278 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
 279 #define COUNT_LEADING_ZEROS_0 32
 280 #endif /* W_TYPE_SIZE == 32 */
 281
 282 #if W_TYPE_SIZE == 64
 283 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
 284 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
 285 #define COUNT_LEADING_ZEROS_0 64
 286 #endif /* W_TYPE_SIZE == 64 */
 287
 288 #endif /* defined (__AVR__) */
 289
 290 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 291 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 292 #if __CRIS_arch_version >= 8
 293 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 294 #endif
 295 #endif /* __CRIS__ */
 296
 297 #if defined (__hppa) && W_TYPE_SIZE == 32
 298 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 299   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 300            : "=r" ((USItype) (sh)),                                     \
 301              "=&r" ((USItype) (sl))                                     \
 302            : "%rM" ((USItype) (ah)),                                    \
 303              "rM" ((USItype) (bh)),                                     \
 304              "%rM" ((USItype) (al)),                                    \
 305              "rM" ((USItype) (bl)))
 306 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 307   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 308            : "=r" ((USItype) (sh)),                                     \
 309              "=&r" ((USItype) (sl))                                     \
 310            : "rM" ((USItype) (ah)),                                     \
 311              "rM" ((USItype) (bh)),                                     \
 312              "rM" ((USItype) (al)),                                     \
 313              "rM" ((USItype) (bl)))
 314 #if defined (_PA_RISC1_1)
 315 #define umul_ppmm(w1, w0, u, v) \
 316   do {                                                                  \
 317     union                                                               \
 318       {                                                                 \
 319         UDItype __f;                                                    \
 320         struct {USItype __w1, __w0;} __w1w0;                            \
 321       } __t;                                                            \
 322     __asm__ ("xmpyu %1,%2,%0"                                           \
 323              : "=x" (__t.__f)                                           \
 324              : "x" ((USItype) (u)),                                     \
 325                "x" ((USItype) (v)));                                    \
 326     (w1) = __t.__w1w0.__w1;                                             \
 327     (w0) = __t.__w1w0.__w0;                                             \
 328      } while (0)
 329 #define UMUL_TIME 8
 330 #else
 331 #define UMUL_TIME 30
 332 #endif
 333 #define UDIV_TIME 40
 334 #define count_leading_zeros(count, x) \
 335   do {                                                                  \
 336     USItype __tmp;                                                      \
 337     __asm__ (                                                           \
 338        "ldi             1,%0\n"                                         \
 339 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 340 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 341 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 342 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 343 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 344 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 345 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 346 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 347 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 348 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 349 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 350 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 351 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 352 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 353         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 354   } while (0)
 355 #endif
 356
 357 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 358 #if !defined (__zarch__)
 359 #define smul_ppmm(xh, xl, m0, m1) \
 360   do {                                                                  \
 361     union {DItype __ll;                                                 \
 362            struct {USItype __h, __l;} __i;                              \
 363           } __x;                                                        \
 364     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 365              : "=&r" (__x.__ll)                                         \
 366              : "r" (m0), "r" (m1));                                     \
 367     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 368   } while (0)
 369 #define sdiv_qrnnd(q, r, n1, n0, d) \
 370   do {                                                                  \
 371     union {DItype __ll;                                                 \
 372            struct {USItype __h, __l;} __i;                              \
 373           } __x;                                                        \
 374     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 375     __asm__ ("dr %0,%2"                                                 \
 376              : "=r" (__x.__ll)                                          \
 377              : "0" (__x.__ll), "r" (d));                                \
 378     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 379   } while (0)
 380 #else
 381 #define smul_ppmm(xh, xl, m0, m1) \
 382   do {                                                                  \
 383     register SItype __r0 __asm__ ("0");                                 \
 384     register SItype __r1 __asm__ ("1") = (m0);                          \
 385                                                                         \
 386     __asm__ ("mr\t%%r0,%3"                                              \
 387              : "=r" (__r0), "=r" (__r1)                                 \
 388              : "r"  (__r1),  "r" (m1));                                 \
 389     (xh) = __r0; (xl) = __r1;                                           \
 390   } while (0)
 391
 392 #define sdiv_qrnnd(q, r, n1, n0, d) \
 393   do {                                                                  \
 394     register SItype __r0 __asm__ ("0") = (n1);                          \
 395     register SItype __r1 __asm__ ("1") = (n0);                          \
 396                                                                         \
 397     __asm__ ("dr\t%%r0,%4"                                              \
 398              : "=r" (__r0), "=r" (__r1)                                 \
 399              : "r" (__r0), "r" (__r1), "r" (d));                        \
 400     (q) = __r1; (r) = __r0;                                             \
 401   } while (0)
 402 #endif /* __zarch__ */
 403 #endif
 404
 405 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 406 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 407   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 408            : "=r" ((USItype) (sh)),                                     \
 409              "=&r" ((USItype) (sl))                                     \
 410            : "%0" ((USItype) (ah)),                                     \
 411              "g" ((USItype) (bh)),                                      \
 412              "%1" ((USItype) (al)),                                     \
 413              "g" ((USItype) (bl)))
 414 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 415   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 416            : "=r" ((USItype) (sh)),                                     \
 417              "=&r" ((USItype) (sl))                                     \
 418            : "0" ((USItype) (ah)),                                      \
 419              "g" ((USItype) (bh)),                                      \
 420              "1" ((USItype) (al)),                                      \
 421              "g" ((USItype) (bl)))
 422 #define umul_ppmm(w1, w0, u, v) \
 423   __asm__ ("mul{l} %3"                                                  \
 424            : "=a" ((USItype) (w0)),                                     \
 425              "=d" ((USItype) (w1))                                      \
 426            : "%0" ((USItype) (u)),                                      \
 427              "rm" ((USItype) (v)))
 428 #define udiv_qrnnd(q, r, n1, n0, dv) \
 429   __asm__ ("div{l} %4"                                                  \
 430            : "=a" ((USItype) (q)),                                      \
 431              "=d" ((USItype) (r))                                       \
 432            : "0" ((USItype) (n0)),                                      \
 433              "1" ((USItype) (n1)),                                      \
 434              "rm" ((USItype) (dv)))
 435 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 436 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 437 #define UMUL_TIME 40
 438 #define UDIV_TIME 40
 439 #endif /* 80x86 */
 440
 441 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 442 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 443   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 444            : "=r" ((UDItype) (sh)),                                     \
 445              "=&r" ((UDItype) (sl))                                     \
 446            : "%0" ((UDItype) (ah)),                                     \
 447              "rme" ((UDItype) (bh)),                                    \
 448              "%1" ((UDItype) (al)),                                     \
 449              "rme" ((UDItype) (bl)))
 450 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 451   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 452            : "=r" ((UDItype) (sh)),                                     \
 453              "=&r" ((UDItype) (sl))                                     \
 454            : "0" ((UDItype) (ah)),                                      \
 455              "rme" ((UDItype) (bh)),                                    \
 456              "1" ((UDItype) (al)),                                      \
 457              "rme" ((UDItype) (bl)))
 458 #define umul_ppmm(w1, w0, u, v) \
 459   __asm__ ("mul{q} %3"                                                  \
 460            : "=a" ((UDItype) (w0)),                                     \
 461              "=d" ((UDItype) (w1))                                      \
 462            : "%0" ((UDItype) (u)),                                      \
 463              "rm" ((UDItype) (v)))
 464 #define udiv_qrnnd(q, r, n1, n0, dv) \
 465   __asm__ ("div{q} %4"                                                  \
 466            : "=a" ((UDItype) (q)),                                      \
 467              "=d" ((UDItype) (r))                                       \
 468            : "0" ((UDItype) (n0)),                                      \
 469              "1" ((UDItype) (n1)),                                      \
 470              "rm" ((UDItype) (dv)))
 471 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
 472 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
 473 #define UMUL_TIME 40
 474 #define UDIV_TIME 40
 475 #endif /* x86_64 */
 476
 477 #if defined (__i960__) && W_TYPE_SIZE == 32
 478 #define umul_ppmm(w1, w0, u, v) \
 479   ({union {UDItype __ll;                                                \
 480            struct {USItype __l, __h;} __i;                              \
 481           } __xx;                                                       \
 482   __asm__ ("emul        %2,%1,%0"                                       \
 483            : "=d" (__xx.__ll)                                           \
 484            : "%dI" ((USItype) (u)),                                     \
 485              "dI" ((USItype) (v)));                                     \
 486   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 487 #define __umulsidi3(u, v) \
 488   ({UDItype __w;                                                        \
 489     __asm__ ("emul      %2,%1,%0"                                       \
 490              : "=d" (__w)                                               \
 491              : "%dI" ((USItype) (u)),                                   \
 492                "dI" ((USItype) (v)));                                   \
 493     __w; })
 494 #endif /* __i960__ */
 495
 496 #if defined (__ia64) && W_TYPE_SIZE == 64
 497 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 498    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 499    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 500    register, which takes an extra cycle.  */
 501 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 502   do {                                                                  \
 503     UWtype __x;                                                         \
 504     __x = (al) - (bl);                                                  \
 505     if ((al) < (bl))                                                    \
 506       (sh) = (ah) - (bh) - 1;                                           \
 507     else                                                                \
 508       (sh) = (ah) - (bh);                                               \
 509     (sl) = __x;                                                         \
 510   } while (0)
 511
 512 /* Do both product parts in assembly, since that gives better code with
 513    all gcc versions.  Some callers will just use the upper part, and in
 514    that situation we waste an instruction, but not any cycles.  */
 515 #define umul_ppmm(ph, pl, m0, m1)                                       \
 516   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 517            : "=&f" (ph), "=f" (pl)                                      \
 518            : "f" (m0), "f" (m1))
 519 #define count_leading_zeros(count, x)                                   \
 520   do {                                                                  \
 521     UWtype _x = (x), _y, _a, _c;                                        \
 522     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 523     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 524     _c = (_a - 1) << 3;                                                 \
 525     _x >>= _c;                                                          \
 526     if (_x >= 1 << 4)                                                   \
 527       _x >>= 4, _c += 4;                                                \
 528     if (_x >= 1 << 2)                                                   \
 529       _x >>= 2, _c += 2;                                                \
 530     _c += _x >> 1;                                                      \
 531     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 532   } while (0)
 533 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 534    based, and we don't need a special case for x==0 here */
 535 #define count_trailing_zeros(count, x)                                  \
 536   do {                                                                  \
 537     UWtype __ctz_x = (x);                                               \
 538     __asm__ ("popcnt %0 = %1"                                           \
 539              : "=r" (count)                                             \
 540              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 541   } while (0)
 542 #define UMUL_TIME 14
 543 #endif
 544
 545 #if defined (__M32R__) && W_TYPE_SIZE == 32
 546 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 547   /* The cmp clears the condition bit.  */ \
 548   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 549            : "=r" ((USItype) (sh)),                                     \
 550              "=&r" ((USItype) (sl))                                     \
 551            : "0" ((USItype) (ah)),                                      \
 552              "r" ((USItype) (bh)),                                      \
 553              "1" ((USItype) (al)),                                      \
 554              "r" ((USItype) (bl))                                       \
 555            : "cbit")
 556 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 557   /* The cmp clears the condition bit.  */ \
 558   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 559            : "=r" ((USItype) (sh)),                                     \
 560              "=&r" ((USItype) (sl))                                     \
 561            : "0" ((USItype) (ah)),                                      \
 562              "r" ((USItype) (bh)),                                      \
 563              "1" ((USItype) (al)),                                      \
 564              "r" ((USItype) (bl))                                       \
 565            : "cbit")
 566 #endif /* __M32R__ */
 567
 568 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 569 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 570   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 571            : "=d" ((USItype) (sh)),                                     \
 572              "=&d" ((USItype) (sl))                                     \
 573            : "%0" ((USItype) (ah)),                                     \
 574              "d" ((USItype) (bh)),                                      \
 575              "%1" ((USItype) (al)),                                     \
 576              "g" ((USItype) (bl)))
 577 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 578   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 579            : "=d" ((USItype) (sh)),                                     \
 580              "=&d" ((USItype) (sl))                                     \
 581            : "0" ((USItype) (ah)),                                      \
 582              "d" ((USItype) (bh)),                                      \
 583              "1" ((USItype) (al)),                                      \
 584              "g" ((USItype) (bl)))
 585
 586 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 587 #if (defined (__mc68020__) && !defined (__mc68060__))
 588 #define umul_ppmm(w1, w0, u, v) \
 589   __asm__ ("mulu%.l %3,%1:%0"                                           \
 590            : "=d" ((USItype) (w0)),                                     \
 591              "=d" ((USItype) (w1))                                      \
 592            : "%0" ((USItype) (u)),                                      \
 593              "dmi" ((USItype) (v)))
 594 #define UMUL_TIME 45
 595 #define udiv_qrnnd(q, r, n1, n0, d) \
 596   __asm__ ("divu%.l %4,%1:%0"                                           \
 597            : "=d" ((USItype) (q)),                                      \
 598              "=d" ((USItype) (r))                                       \
 599            : "0" ((USItype) (n0)),                                      \
 600              "1" ((USItype) (n1)),                                      \
 601              "dmi" ((USItype) (d)))
 602 #define UDIV_TIME 90
 603 #define sdiv_qrnnd(q, r, n1, n0, d) \
 604   __asm__ ("divs%.l %4,%1:%0"                                           \
 605            : "=d" ((USItype) (q)),                                      \
 606              "=d" ((USItype) (r))                                       \
 607            : "0" ((USItype) (n0)),                                      \
 608              "1" ((USItype) (n1)),                                      \
 609              "dmi" ((USItype) (d)))
 610
 611 #elif defined (__mcoldfire__) /* not mc68020 */
 612
 613 #define umul_ppmm(xh, xl, a, b) \
 614   __asm__ ("| Inlined umul_ppmm\n"                                      \
 615            "    move%.l %2,%/d0\n"                                      \
 616            "    move%.l %3,%/d1\n"                                      \
 617            "    move%.l %/d0,%/d2\n"                                    \
 618            "    swap    %/d0\n"                                         \
 619            "    move%.l %/d1,%/d3\n"                                    \
 620            "    swap    %/d1\n"                                         \
 621            "    move%.w %/d2,%/d4\n"                                    \
 622            "    mulu    %/d3,%/d4\n"                                    \
 623            "    mulu    %/d1,%/d2\n"                                    \
 624            "    mulu    %/d0,%/d3\n"                                    \
 625            "    mulu    %/d0,%/d1\n"                                    \
 626            "    move%.l %/d4,%/d0\n"                                    \
 627            "    clr%.w  %/d0\n"                                         \
 628            "    swap    %/d0\n"                                         \
 629            "    add%.l  %/d0,%/d2\n"                                    \
 630            "    add%.l  %/d3,%/d2\n"                                    \
 631            "    jcc     1f\n"                                           \
 632            "    add%.l  %#65536,%/d1\n"                                 \
 633            "1:  swap    %/d2\n"                                         \
 634            "    moveq   %#0,%/d0\n"                                     \
 635            "    move%.w %/d2,%/d0\n"                                    \
 636            "    move%.w %/d4,%/d2\n"                                    \
 637            "    move%.l %/d2,%1\n"                                      \
 638            "    add%.l  %/d1,%/d0\n"                                    \
 639            "    move%.l %/d0,%0"                                        \
 640            : "=g" ((USItype) (xh)),                                     \
 641              "=g" ((USItype) (xl))                                      \
 642            : "g" ((USItype) (a)),                                       \
 643              "g" ((USItype) (b))                                        \
 644            : "d0", "d1", "d2", "d3", "d4")
 645 #define UMUL_TIME 100
 646 #define UDIV_TIME 400
 647 #else /* not ColdFire */
 648 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 649 #define umul_ppmm(xh, xl, a, b) \
 650   __asm__ ("| Inlined umul_ppmm\n"                                      \
 651            "    move%.l %2,%/d0\n"                                      \
 652            "    move%.l %3,%/d1\n"                                      \
 653            "    move%.l %/d0,%/d2\n"                                    \
 654            "    swap    %/d0\n"                                         \
 655            "    move%.l %/d1,%/d3\n"                                    \
 656            "    swap    %/d1\n"                                         \
 657            "    move%.w %/d2,%/d4\n"                                    \
 658            "    mulu    %/d3,%/d4\n"                                    \
 659            "    mulu    %/d1,%/d2\n"                                    \
 660            "    mulu    %/d0,%/d3\n"                                    \
 661            "    mulu    %/d0,%/d1\n"                                    \
 662            "    move%.l %/d4,%/d0\n"                                    \
 663            "    eor%.w  %/d0,%/d0\n"                                    \
 664            "    swap    %/d0\n"                                         \
 665            "    add%.l  %/d0,%/d2\n"                                    \
 666            "    add%.l  %/d3,%/d2\n"                                    \
 667            "    jcc     1f\n"                                           \
 668            "    add%.l  %#65536,%/d1\n"                                 \
 669            "1:  swap    %/d2\n"                                         \
 670            "    moveq   %#0,%/d0\n"                                     \
 671            "    move%.w %/d2,%/d0\n"                                    \
 672            "    move%.w %/d4,%/d2\n"                                    \
 673            "    move%.l %/d2,%1\n"                                      \
 674            "    add%.l  %/d1,%/d0\n"                                    \
 675            "    move%.l %/d0,%0"                                        \
 676            : "=g" ((USItype) (xh)),                                     \
 677              "=g" ((USItype) (xl))                                      \
 678            : "g" ((USItype) (a)),                                       \
 679              "g" ((USItype) (b))                                        \
 680            : "d0", "d1", "d2", "d3", "d4")
 681 #define UMUL_TIME 100
 682 #define UDIV_TIME 400
 683
 684 #endif /* not mc68020 */
 685
 686 /* The '020, '030, '040 and '060 have bitfield insns.
 687    cpu32 disguises as a 68020, but lacks them.  */
 688 #if defined (__mc68020__) && !defined (__mcpu32__)
 689 #define count_leading_zeros(count, x) \
 690   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 691            : "=d" ((USItype) (count))                                   \
 692            : "od" ((USItype) (x)), "n" (0))
 693 /* Some ColdFire architectures have a ff1 instruction supported via
 694    __builtin_clz. */
 695 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 696 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 697 #define COUNT_LEADING_ZEROS_0 32
 698 #endif
 699 #endif /* mc68000 */
 700
 701 #if defined (__m88000__) && W_TYPE_SIZE == 32
 702 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 703   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 704            : "=r" ((USItype) (sh)),                                     \
 705              "=&r" ((USItype) (sl))                                     \
 706            : "%rJ" ((USItype) (ah)),                                    \
 707              "rJ" ((USItype) (bh)),                                     \
 708              "%rJ" ((USItype) (al)),                                    \
 709              "rJ" ((USItype) (bl)))
 710 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 711   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 712            : "=r" ((USItype) (sh)),                                     \
 713              "=&r" ((USItype) (sl))                                     \
 714            : "rJ" ((USItype) (ah)),                                     \
 715              "rJ" ((USItype) (bh)),                                     \
 716              "rJ" ((USItype) (al)),                                     \
 717              "rJ" ((USItype) (bl)))
 718 #define count_leading_zeros(count, x) \
 719   do {                                                                  \
 720     USItype __cbtmp;                                                    \
 721     __asm__ ("ff1 %0,%1"                                                \
 722              : "=r" (__cbtmp)                                           \
 723              : "r" ((USItype) (x)));                                    \
 724     (count) = __cbtmp ^ 31;                                             \
 725   } while (0)
 726 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 727 #if defined (__mc88110__)
 728 #define umul_ppmm(wh, wl, u, v) \
 729   do {                                                                  \
 730     union {UDItype __ll;                                                \
 731            struct {USItype __h, __l;} __i;                              \
 732           } __xx;                                                       \
 733     __asm__ ("mulu.d    %0,%1,%2"                                       \
 734              : "=r" (__xx.__ll)                                         \
 735              : "r" ((USItype) (u)),                                     \
 736                "r" ((USItype) (v)));                                    \
 737     (wh) = __xx.__i.__h;                                                \
 738     (wl) = __xx.__i.__l;                                                \
 739   } while (0)
 740 #define udiv_qrnnd(q, r, n1, n0, d) \
 741   ({union {UDItype __ll;                                                \
 742            struct {USItype __h, __l;} __i;                              \
 743           } __xx;                                                       \
 744   USItype __q;                                                          \
 745   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 746   __asm__ ("divu.d %0,%1,%2"                                            \
 747            : "=r" (__q)                                                 \
 748            : "r" (__xx.__ll),                                           \
 749              "r" ((USItype) (d)));                                      \
 750   (r) = (n0) - __q * (d); (q) = __q; })
 751 #define UMUL_TIME 5
 752 #define UDIV_TIME 25
 753 #else
 754 #define UMUL_TIME 17
 755 #define UDIV_TIME 150
 756 #endif /* __mc88110__ */
 757 #endif /* __m88000__ */
 758
 759 #if defined (__mn10300__)
 760 # if defined (__AM33__)
 761 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 762 #  define umul_ppmm(w1, w0, u, v)               \
 763     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 764 #  define smul_ppmm(w1, w0, u, v)               \
 765     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 766 # else
 767 #  define umul_ppmm(w1, w0, u, v)               \
 768     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 769 #  define smul_ppmm(w1, w0, u, v)               \
 770     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 771 # endif
 772 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 773   do {                                          \
 774     DWunion __s, __a, __b;                      \
 775     __a.s.low = (al); __a.s.high = (ah);        \
 776     __b.s.low = (bl); __b.s.high = (bh);        \
 777     __s.ll = __a.ll + __b.ll;                   \
 778     (sl) = __s.s.low; (sh) = __s.s.high;        \
 779   } while (0)
 780 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 781   do {                                          \
 782     DWunion __s, __a, __b;                      \
 783     __a.s.low = (al); __a.s.high = (ah);        \
 784     __b.s.low = (bl); __b.s.high = (bh);        \
 785     __s.ll = __a.ll - __b.ll;                   \
 786     (sl) = __s.s.low; (sh) = __s.s.high;        \
 787   } while (0)
 788 # define udiv_qrnnd(q, r, nh, nl, d)            \
 789   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 790 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 791   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 792 # define UMUL_TIME 3
 793 # define UDIV_TIME 38
 794 #endif
 795
 796 #if defined (__mips__) && W_TYPE_SIZE == 32
 797 #define umul_ppmm(w1, w0, u, v)                                         \
 798   do {                                                                  \
 799     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 800     (w1) = (USItype) (__x >> 32);                                       \
 801     (w0) = (USItype) (__x);                                             \
 802   } while (0)
 803 #define UMUL_TIME 10
 804 #define UDIV_TIME 100
 805
 806 #if (__mips == 32 || __mips == 64) && ! __mips16
 807 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 808 #define COUNT_LEADING_ZEROS_0 32
 809 #endif
 810 #endif /* __mips__ */
 811
 812 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 813 #define umul_ppmm(w1, w0, u, v) \
 814   ({union {UDItype __ll;                                                \
 815            struct {USItype __l, __h;} __i;                              \
 816           } __xx;                                                       \
 817   __asm__ ("meid %2,%0"                                                 \
 818            : "=g" (__xx.__ll)                                           \
 819            : "%0" ((USItype) (u)),                                      \
 820              "g" ((USItype) (v)));                                      \
 821   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 822 #define __umulsidi3(u, v) \
 823   ({UDItype __w;                                                        \
 824     __asm__ ("meid %2,%0"                                               \
 825              : "=g" (__w)                                               \
 826              : "%0" ((USItype) (u)),                                    \
 827                "g" ((USItype) (v)));                                    \
 828     __w; })
 829 #define udiv_qrnnd(q, r, n1, n0, d) \
 830   ({union {UDItype __ll;                                                \
 831            struct {USItype __l, __h;} __i;                              \
 832           } __xx;                                                       \
 833   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 834   __asm__ ("deid %2,%0"                                                 \
 835            : "=g" (__xx.__ll)                                           \
 836            : "0" (__xx.__ll),                                           \
 837              "g" ((USItype) (d)));                                      \
 838   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 839 #define count_trailing_zeros(count,x) \
 840   do {                                                                  \
 841     __asm__ ("ffsd     %2,%0"                                           \
 842             : "=r" ((USItype) (count))                                  \
 843             : "0" ((USItype) 0),                                        \
 844               "r" ((USItype) (x)));                                     \
 845   } while (0)
 846 #endif /* __ns32000__ */
 847
 848 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 849    system vendor compilers.
 850    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 851    enough, since that hits ARM and m68k too.  */
 852 #if (defined (_ARCH_PPC)        /* AIX */                               \
 853      || defined (__powerpc__)   /* gcc */                               \
 854      || defined (__POWERPC__)   /* BEOS */                              \
 855      || defined (__ppc__)       /* Darwin */                            \
 856      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 857      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 858          && CPU_FAMILY == PPC)                                                \
 859      ) && W_TYPE_SIZE == 32
 860 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 861   do {                                                                  \
 862     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 863       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
 864              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 865     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 866       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
 867              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 868     else                                                                \
 869       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
 870              : "=r" (sh), "=&r" (sl)                                    \
 871              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 872   } while (0)
 873 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 874   do {                                                                  \
 875     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 876       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
 877                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 878     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 879       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
 880                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 881     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 882       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
 883                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 884     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 885       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
 886                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 887     else                                                                \
 888       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
 889                : "=r" (sh), "=&r" (sl)                                  \
 890                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 891   } while (0)
 892 #define count_leading_zeros(count, x) \
 893   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
 894 #define COUNT_LEADING_ZEROS_0 32
 895 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 896   || defined (__ppc__)                                                    \
 897   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 898   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 899          && CPU_FAMILY == PPC)
 900 #define umul_ppmm(ph, pl, m0, m1) \
 901   do {                                                                  \
 902     USItype __m0 = (m0), __m1 = (m1);                                   \
 903     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 904     (pl) = __m0 * __m1;                                                 \
 905   } while (0)
 906 #define UMUL_TIME 15
 907 #define smul_ppmm(ph, pl, m0, m1) \
 908   do {                                                                  \
 909     SItype __m0 = (m0), __m1 = (m1);                                    \
 910     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 911     (pl) = __m0 * __m1;                                                 \
 912   } while (0)
 913 #define SMUL_TIME 14
 914 #define UDIV_TIME 120
 915 #endif
 916 #endif /* 32-bit POWER architecture variants.  */
 917
 918 /* We should test _IBMR2 here when we add assembly support for the system
 919    vendor compilers.  */
 920 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 921 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 922   do {                                                                  \
 923     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 924       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                \
 925              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 926     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 927       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                \
 928              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 929     else                                                                \
 930       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"              \
 931              : "=r" (sh), "=&r" (sl)                                    \
 932              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 933   } while (0)
 934 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 935   do {                                                                  \
 936     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 937       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"      \
 938                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 939     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 940       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"      \
 941                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 942     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 943       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"               \
 944                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 945     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 946       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"               \
 947                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 948     else                                                                \
 949       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"    \
 950                : "=r" (sh), "=&r" (sl)                                  \
 951                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 952   } while (0)
 953 #define count_leading_zeros(count, x) \
 954   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 955 #define COUNT_LEADING_ZEROS_0 64
 956 #define umul_ppmm(ph, pl, m0, m1) \
 957   do {                                                                  \
 958     UDItype __m0 = (m0), __m1 = (m1);                                   \
 959     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 960     (pl) = __m0 * __m1;                                                 \
 961   } while (0)
 962 #define UMUL_TIME 15
 963 #define smul_ppmm(ph, pl, m0, m1) \
 964   do {                                                                  \
 965     DItype __m0 = (m0), __m1 = (m1);                                    \
 966     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 967     (pl) = __m0 * __m1;                                                 \
 968   } while (0)
 969 #define SMUL_TIME 14  /* ??? */
 970 #define UDIV_TIME 120 /* ??? */
 971 #endif /* 64-bit PowerPC.  */
 972
 973 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 974 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 975   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 976            : "=r" ((USItype) (sh)),                                     \
 977              "=&r" ((USItype) (sl))                                     \
 978            : "%0" ((USItype) (ah)),                                     \
 979              "r" ((USItype) (bh)),                                      \
 980              "%1" ((USItype) (al)),                                     \
 981              "r" ((USItype) (bl)))
 982 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 983   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 984            : "=r" ((USItype) (sh)),                                     \
 985              "=&r" ((USItype) (sl))                                     \
 986            : "0" ((USItype) (ah)),                                      \
 987              "r" ((USItype) (bh)),                                      \
 988              "1" ((USItype) (al)),                                      \
 989              "r" ((USItype) (bl)))
 990 #define umul_ppmm(ph, pl, m0, m1) \
 991   do {                                                                  \
 992     USItype __m0 = (m0), __m1 = (m1);                                   \
 993     __asm__ (                                                           \
 994        "s       r2,r2\n"                                                \
 995 "       mts     r10,%2\n"                                               \
 996 "       m       r2,%3\n"                                                \
 997 "       m       r2,%3\n"                                                \
 998 "       m       r2,%3\n"                                                \
 999 "       m       r2,%3\n"                                                \
1000 "       m       r2,%3\n"                                                \
1001 "       m       r2,%3\n"                                                \
1002 "       m       r2,%3\n"                                                \
1003 "       m       r2,%3\n"                                                \
1004 "       m       r2,%3\n"                                                \
1005 "       m       r2,%3\n"                                                \
1006 "       m       r2,%3\n"                                                \
1007 "       m       r2,%3\n"                                                \
1008 "       m       r2,%3\n"                                                \
1009 "       m       r2,%3\n"                                                \
1010 "       m       r2,%3\n"                                                \
1011 "       m       r2,%3\n"                                                \
1012 "       cas     %0,r2,r0\n"                                             \
1013 "       mfs     r10,%1"                                                 \
1014              : "=r" ((USItype) (ph)),                                   \
1015                "=r" ((USItype) (pl))                                    \
1016              : "%r" (__m0),                                             \
1017                 "r" (__m1)                                              \
1018              : "r2");                                                   \
1019     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1020              + (((SItype) __m1 >> 31) & __m0));                         \
1021   } while (0)
1022 #define UMUL_TIME 20
1023 #define UDIV_TIME 200
1024 #define count_leading_zeros(count, x) \
1025   do {                                                                  \
1026     if ((x) >= 0x10000)                                                 \
1027       __asm__ ("clz     %0,%1"                                          \
1028                : "=r" ((USItype) (count))                               \
1029                : "r" ((USItype) (x) >> 16));                            \
1030     else                                                                \
1031       {                                                                 \
1032         __asm__ ("clz   %0,%1"                                          \
1033                  : "=r" ((USItype) (count))                             \
1034                  : "r" ((USItype) (x)));                                        \
1035         (count) += 16;                                                  \
1036       }                                                                 \
1037   } while (0)
1038 #endif
1039
1040 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1041 #ifndef __sh1__
1042 #define umul_ppmm(w1, w0, u, v) \
1043   __asm__ (                                                             \
1044        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1045            : "=r<" ((USItype)(w1)),                                     \
1046              "=r<" ((USItype)(w0))                                      \
1047            : "r" ((USItype)(u)),                                        \
1048              "r" ((USItype)(v))                                         \
1049            : "macl", "mach")
1050 #define UMUL_TIME 5
1051 #endif
1052
1053 /* This is the same algorithm as __udiv_qrnnd_c.  */
1054 #define UDIV_NEEDS_NORMALIZATION 1
1055
1056 #define udiv_qrnnd(q, r, n1, n0, d) \
1057   do {                                                                  \
1058     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1059                         __attribute__ ((visibility ("hidden")));        \
1060     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1061     __asm__ (                                                           \
1062         "mov%M4 %4,r5\n"                                                \
1063 "       swap.w %3,r4\n"                                                 \
1064 "       swap.w r5,r6\n"                                                 \
1065 "       jsr @%5\n"                                                      \
1066 "       shll16 r6\n"                                                    \
1067 "       swap.w r4,r4\n"                                                 \
1068 "       jsr @%5\n"                                                      \
1069 "       swap.w r1,%0\n"                                                 \
1070 "       or r1,%0"                                                       \
1071         : "=r" (q), "=&z" (r)                                           \
1072         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1073         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1074   } while (0)
1075
1076 #define UDIV_TIME 80
1077
1078 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1079   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1080            : "=r" (sh), "=r" (sl)                                       \
1081            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1082
1083 #endif /* __sh__ */
1084
1085 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1086 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1087 #define count_leading_zeros(count, x) \
1088   do                                                                    \
1089     {                                                                   \
1090       UDItype x_ = (USItype)(x);                                        \
1091       SItype c_;                                                        \
1092                                                                         \
1093       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1094       (count) = c_ - 31;                                                \
1095     }                                                                   \
1096   while (0)
1097 #define COUNT_LEADING_ZEROS_0 32
1098 #endif
1099
1100 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1101     && W_TYPE_SIZE == 32
1102 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1103   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1104            : "=r" ((USItype) (sh)),                                     \
1105              "=&r" ((USItype) (sl))                                     \
1106            : "%rJ" ((USItype) (ah)),                                    \
1107              "rI" ((USItype) (bh)),                                     \
1108              "%rJ" ((USItype) (al)),                                    \
1109              "rI" ((USItype) (bl))                                      \
1110            __CLOBBER_CC)
1111 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1112   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1113            : "=r" ((USItype) (sh)),                                     \
1114              "=&r" ((USItype) (sl))                                     \
1115            : "rJ" ((USItype) (ah)),                                     \
1116              "rI" ((USItype) (bh)),                                     \
1117              "rJ" ((USItype) (al)),                                     \
1118              "rI" ((USItype) (bl))                                      \
1119            __CLOBBER_CC)
1120 #if defined (__sparc_v9__)
1121 #define umul_ppmm(w1, w0, u, v) \
1122   do {                                                                  \
1123     register USItype __g1 asm ("g1");                                   \
1124     __asm__ ("umul\t%2,%3,%1\n\t"                                       \
1125              "srlx\t%1, 32, %0"                                         \
1126              : "=r" ((USItype) (w1)),                                   \
1127                "=r" (__g1)                                              \
1128              : "r" ((USItype) (u)),                                     \
1129                "r" ((USItype) (v)));                                    \
1130     (w0) = __g1;                                                        \
1131   } while (0)
1132 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1133   __asm__ ("mov\t%2,%%y\n\t"                                            \
1134            "udiv\t%3,%4,%0\n\t"                                         \
1135            "umul\t%0,%4,%1\n\t"                                         \
1136            "sub\t%3,%1,%1"                                              \
1137            : "=&r" ((USItype) (__q)),                                   \
1138              "=&r" ((USItype) (__r))                                    \
1139            : "r" ((USItype) (__n1)),                                    \
1140              "r" ((USItype) (__n0)),                                    \
1141              "r" ((USItype) (__d)))
1142 #else
1143 #if defined (__sparc_v8__)
1144 #define umul_ppmm(w1, w0, u, v) \
1145   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1146            : "=r" ((USItype) (w1)),                                     \
1147              "=r" ((USItype) (w0))                                      \
1148            : "r" ((USItype) (u)),                                       \
1149              "r" ((USItype) (v)))
1150 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1151   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1152            : "=&r" ((USItype) (__q)),                                   \
1153              "=&r" ((USItype) (__r))                                    \
1154            : "r" ((USItype) (__n1)),                                    \
1155              "r" ((USItype) (__n0)),                                    \
1156              "r" ((USItype) (__d)))
1157 #else
1158 #if defined (__sparclite__)
1159 /* This has hardware multiply but not divide.  It also has two additional
1160    instructions scan (ffs from high bit) and divscc.  */
1161 #define umul_ppmm(w1, w0, u, v) \
1162   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1163            : "=r" ((USItype) (w1)),                                     \
1164              "=r" ((USItype) (w0))                                      \
1165            : "r" ((USItype) (u)),                                       \
1166              "r" ((USItype) (v)))
1167 #define udiv_qrnnd(q, r, n1, n0, d) \
1168   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1169 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1170 "       tst     %%g0\n"                                                 \
1171 "       divscc  %3,%4,%%g1\n"                                           \
1172 "       divscc  %%g1,%4,%%g1\n"                                         \
1173 "       divscc  %%g1,%4,%%g1\n"                                         \
1174 "       divscc  %%g1,%4,%%g1\n"                                         \
1175 "       divscc  %%g1,%4,%%g1\n"                                         \
1176 "       divscc  %%g1,%4,%%g1\n"                                         \
1177 "       divscc  %%g1,%4,%%g1\n"                                         \
1178 "       divscc  %%g1,%4,%%g1\n"                                         \
1179 "       divscc  %%g1,%4,%%g1\n"                                         \
1180 "       divscc  %%g1,%4,%%g1\n"                                         \
1181 "       divscc  %%g1,%4,%%g1\n"                                         \
1182 "       divscc  %%g1,%4,%%g1\n"                                         \
1183 "       divscc  %%g1,%4,%%g1\n"                                         \
1184 "       divscc  %%g1,%4,%%g1\n"                                         \
1185 "       divscc  %%g1,%4,%%g1\n"                                         \
1186 "       divscc  %%g1,%4,%%g1\n"                                         \
1187 "       divscc  %%g1,%4,%%g1\n"                                         \
1188 "       divscc  %%g1,%4,%%g1\n"                                         \
1189 "       divscc  %%g1,%4,%%g1\n"                                         \
1190 "       divscc  %%g1,%4,%%g1\n"                                         \
1191 "       divscc  %%g1,%4,%%g1\n"                                         \
1192 "       divscc  %%g1,%4,%%g1\n"                                         \
1193 "       divscc  %%g1,%4,%%g1\n"                                         \
1194 "       divscc  %%g1,%4,%%g1\n"                                         \
1195 "       divscc  %%g1,%4,%%g1\n"                                         \
1196 "       divscc  %%g1,%4,%%g1\n"                                         \
1197 "       divscc  %%g1,%4,%%g1\n"                                         \
1198 "       divscc  %%g1,%4,%%g1\n"                                         \
1199 "       divscc  %%g1,%4,%%g1\n"                                         \
1200 "       divscc  %%g1,%4,%%g1\n"                                         \
1201 "       divscc  %%g1,%4,%%g1\n"                                         \
1202 "       divscc  %%g1,%4,%0\n"                                           \
1203 "       rd      %%y,%1\n"                                               \
1204 "       bl,a 1f\n"                                                      \
1205 "       add     %1,%4,%1\n"                                             \
1206 "1:     ! End of inline udiv_qrnnd"                                     \
1207            : "=r" ((USItype) (q)),                                      \
1208              "=r" ((USItype) (r))                                       \
1209            : "r" ((USItype) (n1)),                                      \
1210              "r" ((USItype) (n0)),                                      \
1211              "rI" ((USItype) (d))                                       \
1212            : "g1" __AND_CLOBBER_CC)
1213 #define UDIV_TIME 37
1214 #define count_leading_zeros(count, x) \
1215   do {                                                                  \
1216   __asm__ ("scan %1,1,%0"                                               \
1217            : "=r" ((USItype) (count))                                   \
1218            : "r" ((USItype) (x)));                                      \
1219   } while (0)
1220 /* Early sparclites return 63 for an argument of 0, but they warn that future
1221    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1222    undefined.  */
1223 #else
1224 /* SPARC without integer multiplication and divide instructions.
1225    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1226 #define umul_ppmm(w1, w0, u, v) \
1227   __asm__ ("! Inlined umul_ppmm\n"                                      \
1228 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1229 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1230 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1231 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1232 "       mulscc  %%g1,%3,%%g1\n"                                         \
1233 "       mulscc  %%g1,%3,%%g1\n"                                         \
1234 "       mulscc  %%g1,%3,%%g1\n"                                         \
1235 "       mulscc  %%g1,%3,%%g1\n"                                         \
1236 "       mulscc  %%g1,%3,%%g1\n"                                         \
1237 "       mulscc  %%g1,%3,%%g1\n"                                         \
1238 "       mulscc  %%g1,%3,%%g1\n"                                         \
1239 "       mulscc  %%g1,%3,%%g1\n"                                         \
1240 "       mulscc  %%g1,%3,%%g1\n"                                         \
1241 "       mulscc  %%g1,%3,%%g1\n"                                         \
1242 "       mulscc  %%g1,%3,%%g1\n"                                         \
1243 "       mulscc  %%g1,%3,%%g1\n"                                         \
1244 "       mulscc  %%g1,%3,%%g1\n"                                         \
1245 "       mulscc  %%g1,%3,%%g1\n"                                         \
1246 "       mulscc  %%g1,%3,%%g1\n"                                         \
1247 "       mulscc  %%g1,%3,%%g1\n"                                         \
1248 "       mulscc  %%g1,%3,%%g1\n"                                         \
1249 "       mulscc  %%g1,%3,%%g1\n"                                         \
1250 "       mulscc  %%g1,%3,%%g1\n"                                         \
1251 "       mulscc  %%g1,%3,%%g1\n"                                         \
1252 "       mulscc  %%g1,%3,%%g1\n"                                         \
1253 "       mulscc  %%g1,%3,%%g1\n"                                         \
1254 "       mulscc  %%g1,%3,%%g1\n"                                         \
1255 "       mulscc  %%g1,%3,%%g1\n"                                         \
1256 "       mulscc  %%g1,%3,%%g1\n"                                         \
1257 "       mulscc  %%g1,%3,%%g1\n"                                         \
1258 "       mulscc  %%g1,%3,%%g1\n"                                         \
1259 "       mulscc  %%g1,%3,%%g1\n"                                         \
1260 "       mulscc  %%g1,%3,%%g1\n"                                         \
1261 "       mulscc  %%g1,%3,%%g1\n"                                         \
1262 "       mulscc  %%g1,%3,%%g1\n"                                         \
1263 "       mulscc  %%g1,%3,%%g1\n"                                         \
1264 "       mulscc  %%g1,0,%%g1\n"                                          \
1265 "       add     %%g1,%%o5,%0\n"                                         \
1266 "       rd      %%y,%1"                                                 \
1267            : "=r" ((USItype) (w1)),                                     \
1268              "=r" ((USItype) (w0))                                      \
1269            : "%rI" ((USItype) (u)),                                     \
1270              "r" ((USItype) (v))                                                \
1271            : "g1", "o5" __AND_CLOBBER_CC)
1272 #define UMUL_TIME 39            /* 39 instructions */
1273 /* It's quite necessary to add this much assembler for the sparc.
1274    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1275 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1276   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1277 "       mov     32,%%g1\n"                                              \
1278 "       subcc   %1,%2,%%g0\n"                                           \
1279 "1:     bcs     5f\n"                                                   \
1280 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1281 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1282 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1283 "       subcc   %%g1,1,%%g1\n"                                          \
1284 "2:     bne     1b\n"                                                   \
1285 "        subcc  %1,%2,%%g0\n"                                           \
1286 "       bcs     3f\n"                                                   \
1287 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1288 "       b       3f\n"                                                   \
1289 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1290 "4:     sub     %1,%2,%1\n"                                             \
1291 "5:     addxcc  %1,%1,%1\n"                                             \
1292 "       bcc     2b\n"                                                   \
1293 "        subcc  %%g1,1,%%g1\n"                                          \
1294 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1295 "       bne     4b\n"                                                   \
1296 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1297 "       sub     %1,%2,%1\n"                                             \
1298 "3:     xnor    %0,0,%0\n"                                              \
1299 "       ! End of inline udiv_qrnnd"                                     \
1300            : "=&r" ((USItype) (__q)),                                   \
1301              "=&r" ((USItype) (__r))                                    \
1302            : "r" ((USItype) (__d)),                                     \
1303              "1" ((USItype) (__n1)),                                    \
1304              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1305 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1306 #endif /* __sparclite__ */
1307 #endif /* __sparc_v8__ */
1308 #endif /* __sparc_v9__ */
1309 #endif /* sparc32 */
1310
1311 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1312     && W_TYPE_SIZE == 64
1313 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1314   do {                                                                  \
1315     UDItype __carry = 0;                                                \
1316     __asm__ ("addcc\t%r5,%6,%1\n\t"                                     \
1317              "add\t%r3,%4,%0\n\t"                                       \
1318              "movcs\t%%xcc, 1, %2\n\t"                                  \
1319              "add\t%0, %2, %0"                                          \
1320              : "=r" ((UDItype)(sh)),                                    \
1321                "=&r" ((UDItype)(sl)),                                   \
1322                "+r" (__carry)                                           \
1323              : "%rJ" ((UDItype)(ah)),                                   \
1324                "rI" ((UDItype)(bh)),                                    \
1325                "%rJ" ((UDItype)(al)),                                   \
1326                "rI" ((UDItype)(bl))                                     \
1327              __CLOBBER_CC);                                             \
1328   } while (0)
1329
1330 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1331   do {                                                                  \
1332     UDItype __carry = 0;                                                \
1333     __asm__ ("subcc\t%r5,%6,%1\n\t"                                     \
1334              "sub\t%r3,%4,%0\n\t"                                       \
1335              "movcs\t%%xcc, 1, %2\n\t"                                  \
1336              "sub\t%0, %2, %0"                                          \
1337              : "=r" ((UDItype)(sh)),                                    \
1338                "=&r" ((UDItype)(sl)),                                   \
1339                "+r" (__carry)                                           \
1340              : "%rJ" ((UDItype)(ah)),                                   \
1341                "rI" ((UDItype)(bh)),                                    \
1342                "%rJ" ((UDItype)(al)),                                   \
1343                "rI" ((UDItype)(bl))                                     \
1344              __CLOBBER_CC);                                             \
1345   } while (0)
1346
1347 #define umul_ppmm(wh, wl, u, v)                                         \
1348   do {                                                                  \
1349           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1350           __asm__ __volatile__ (                                        \
1351                    "srl %7,0,%3\n\t"                                    \
1352                    "mulx %3,%6,%1\n\t"                                  \
1353                    "srlx %6,32,%2\n\t"                                  \
1354                    "mulx %2,%3,%4\n\t"                                  \
1355                    "sllx %4,32,%5\n\t"                                  \
1356                    "srl %6,0,%3\n\t"                                    \
1357                    "sub %1,%5,%5\n\t"                                   \
1358                    "srlx %5,32,%5\n\t"                                  \
1359                    "addcc %4,%5,%4\n\t"                                 \
1360                    "srlx %7,32,%5\n\t"                                  \
1361                    "mulx %3,%5,%3\n\t"                                  \
1362                    "mulx %2,%5,%5\n\t"                                  \
1363                    "sethi %%hi(0x80000000),%2\n\t"                      \
1364                    "addcc %4,%3,%4\n\t"                                 \
1365                    "srlx %4,32,%4\n\t"                                  \
1366                    "add %2,%2,%2\n\t"                                   \
1367                    "movcc %%xcc,%%g0,%2\n\t"                            \
1368                    "addcc %5,%4,%5\n\t"                                 \
1369                    "sllx %3,32,%3\n\t"                                  \
1370                    "add %1,%3,%1\n\t"                                   \
1371                    "add %5,%2,%0"                                       \
1372            : "=r" ((UDItype)(wh)),                                      \
1373              "=&r" ((UDItype)(wl)),                                     \
1374              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1375            : "r" ((UDItype)(u)),                                        \
1376              "r" ((UDItype)(v))                                         \
1377            __CLOBBER_CC);                                               \
1378   } while (0)
1379 #define UMUL_TIME 96
1380 #define UDIV_TIME 230
1381 #endif /* sparc64 */
1382
1383 #if defined (__vax__) && W_TYPE_SIZE == 32
1384 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1385   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1386            : "=g" ((USItype) (sh)),                                     \
1387              "=&g" ((USItype) (sl))                                     \
1388            : "%0" ((USItype) (ah)),                                     \
1389              "g" ((USItype) (bh)),                                      \
1390              "%1" ((USItype) (al)),                                     \
1391              "g" ((USItype) (bl)))
1392 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1393   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1394            : "=g" ((USItype) (sh)),                                     \
1395              "=&g" ((USItype) (sl))                                     \
1396            : "0" ((USItype) (ah)),                                      \
1397              "g" ((USItype) (bh)),                                      \
1398              "1" ((USItype) (al)),                                      \
1399              "g" ((USItype) (bl)))
1400 #define umul_ppmm(xh, xl, m0, m1) \
1401   do {                                                                  \
1402     union {                                                             \
1403         UDItype __ll;                                                   \
1404         struct {USItype __l, __h;} __i;                                 \
1405       } __xx;                                                           \
1406     USItype __m0 = (m0), __m1 = (m1);                                   \
1407     __asm__ ("emul %1,%2,$0,%0"                                         \
1408              : "=r" (__xx.__ll)                                         \
1409              : "g" (__m0),                                              \
1410                "g" (__m1));                                             \
1411     (xh) = __xx.__i.__h;                                                \
1412     (xl) = __xx.__i.__l;                                                \
1413     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1414              + (((SItype) __m1 >> 31) & __m0));                         \
1415   } while (0)
1416 #define sdiv_qrnnd(q, r, n1, n0, d) \
1417   do {                                                                  \
1418     union {DItype __ll;                                                 \
1419            struct {SItype __l, __h;} __i;                               \
1420           } __xx;                                                       \
1421     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1422     __asm__ ("ediv %3,%2,%0,%1"                                         \
1423              : "=g" (q), "=g" (r)                                       \
1424              : "g" (__xx.__ll), "g" (d));                               \
1425   } while (0)
1426 #endif /* __vax__ */
1427
1428 #ifdef _TMS320C6X
1429 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1430   do                                                                    \
1431     {                                                                   \
1432       UDItype __ll;                                                     \
1433       __asm__ ("addu .l1 %1, %2, %0"                                    \
1434                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1435       (sl) = (USItype)__ll;                                             \
1436       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1437     }                                                                   \
1438   while (0)
1439
1440 #ifdef _TMS320C6400_PLUS
1441 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1442 #define umul_ppmm(w1, w0, u, v)                                         \
1443   do {                                                                  \
1444     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1445     (w1) = (USItype) (__x >> 32);                                       \
1446     (w0) = (USItype) (__x);                                             \
1447   } while (0)
1448 #endif  /* _TMS320C6400_PLUS */
1449
1450 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1451 #ifdef _TMS320C6400
1452 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1453 #endif
1454 #define UMUL_TIME 4
1455 #define UDIV_TIME 40
1456 #endif /* _TMS320C6X */
1457
1458 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1459 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1460    to expand builtin functions depending on what configuration features
1461    are available.  This avoids library calls when the operation can be
1462    performed in-line.  */
1463 #define umul_ppmm(w1, w0, u, v)                                         \
1464   do {                                                                  \
1465     DWunion __w;                                                        \
1466     __w.ll = __builtin_umulsidi3 (u, v);                                \
1467     w1 = __w.s.high;                                                    \
1468     w0 = __w.s.low;                                                     \
1469   } while (0)
1470 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1471 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1472 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1473 #endif /* __xtensa__ */
1474
1475 #if defined xstormy16
1476 extern UHItype __stormy16_count_leading_zeros (UHItype);
1477 #define count_leading_zeros(count, x)                                   \
1478   do                                                                    \
1479     {                                                                   \
1480       UHItype size;                                                     \
1481                                                                         \
1482       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1483       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1484         {                                                               \
1485           UHItype c;                                                    \
1486                                                                         \
1487           c = __clzhi2 ((x) >> (size - 16));                            \
1488           (count) += c;                                                 \
1489           if (c != 16)                                                  \
1490             break;                                                      \
1491         }                                                               \
1492     }                                                                   \
1493   while (0)
1494 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1495 #endif
1496
1497 #if defined (__z8000__) && W_TYPE_SIZE == 16
1498 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1499   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1500            : "=r" ((unsigned int)(sh)),                                 \
1501              "=&r" ((unsigned int)(sl))                                 \
1502            : "%0" ((unsigned int)(ah)),                                 \
1503              "r" ((unsigned int)(bh)),                                  \
1504              "%1" ((unsigned int)(al)),                                 \
1505              "rQR" ((unsigned int)(bl)))
1506 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1507   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1508            : "=r" ((unsigned int)(sh)),                                 \
1509              "=&r" ((unsigned int)(sl))                                 \
1510            : "0" ((unsigned int)(ah)),                                  \
1511              "r" ((unsigned int)(bh)),                                  \
1512              "1" ((unsigned int)(al)),                                  \
1513              "rQR" ((unsigned int)(bl)))
1514 #define umul_ppmm(xh, xl, m0, m1) \
1515   do {                                                                  \
1516     union {long int __ll;                                               \
1517            struct {unsigned int __h, __l;} __i;                         \
1518           } __xx;                                                       \
1519     unsigned int __m0 = (m0), __m1 = (m1);                              \
1520     __asm__ ("mult      %S0,%H3"                                        \
1521              : "=r" (__xx.__i.__h),                                     \
1522                "=r" (__xx.__i.__l)                                      \
1523              : "%1" (__m0),                                             \
1524                "rQR" (__m1));                                           \
1525     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1526     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1527              + (((signed int) __m1 >> 15) & __m0));                     \
1528   } while (0)
1529 #endif /* __z8000__ */
1530
1531 #endif /* __GNUC__ */
1532
1533 /* If this machine has no inline assembler, use C macros.  */
1534
1535 #if !defined (add_ssaaaa)
1536 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1537   do {                                                                  \
1538     UWtype __x;                                                         \
1539     __x = (al) + (bl);                                                  \
1540     (sh) = (ah) + (bh) + (__x < (al));                                  \
1541     (sl) = __x;                                                         \
1542   } while (0)
1543 #endif
1544
1545 #if !defined (sub_ddmmss)
1546 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1547   do {                                                                  \
1548     UWtype __x;                                                         \
1549     __x = (al) - (bl);                                                  \
1550     (sh) = (ah) - (bh) - (__x > (al));                                  \
1551     (sl) = __x;                                                         \
1552   } while (0)
1553 #endif
1554
1555 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1556    smul_ppmm.  */
1557 #if !defined (umul_ppmm) && defined (smul_ppmm)
1558 #define umul_ppmm(w1, w0, u, v)                                         \
1559   do {                                                                  \
1560     UWtype __w1;                                                        \
1561     UWtype __xm0 = (u), __xm1 = (v);                                    \
1562     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1563     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1564                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1565   } while (0)
1566 #endif
1567
1568 /* If we still don't have umul_ppmm, define it using plain C.  */
1569 #if !defined (umul_ppmm)
1570 #define umul_ppmm(w1, w0, u, v)                                         \
1571   do {                                                                  \
1572     UWtype __x0, __x1, __x2, __x3;                                      \
1573     UHWtype __ul, __vl, __uh, __vh;                                     \
1574                                                                         \
1575     __ul = __ll_lowpart (u);                                            \
1576     __uh = __ll_highpart (u);                                           \
1577     __vl = __ll_lowpart (v);                                            \
1578     __vh = __ll_highpart (v);                                           \
1579                                                                         \
1580     __x0 = (UWtype) __ul * __vl;                                        \
1581     __x1 = (UWtype) __ul * __vh;                                        \
1582     __x2 = (UWtype) __uh * __vl;                                        \
1583     __x3 = (UWtype) __uh * __vh;                                        \
1584                                                                         \
1585     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1586     __x1 += __x2;               /* but this indeed can */               \
1587     if (__x1 < __x2)            /* did we get it? */                    \
1588       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1589                                                                         \
1590     (w1) = __x3 + __ll_highpart (__x1);                                 \
1591     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1592   } while (0)
1593 #endif
1594
1595 #if !defined (__umulsidi3)
1596 #define __umulsidi3(u, v) \
1597   ({DWunion __w;                                                        \
1598     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1599     __w.ll; })
1600 #endif
1601
1602 /* Define this unconditionally, so it can be used for debugging.  */
1603 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1604   do {                                                                  \
1605     UWtype __d1, __d0, __q1, __q0;                                      \
1606     UWtype __r1, __r0, __m;                                             \
1607     __d1 = __ll_highpart (d);                                           \
1608     __d0 = __ll_lowpart (d);                                            \
1609                                                                         \
1610     __r1 = (n1) % __d1;                                                 \
1611     __q1 = (n1) / __d1;                                                 \
1612     __m = (UWtype) __q1 * __d0;                                         \
1613     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1614     if (__r1 < __m)                                                     \
1615       {                                                                 \
1616         __q1--, __r1 += (d);                                            \
1617         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1618           if (__r1 < __m)                                               \
1619             __q1--, __r1 += (d);                                        \
1620       }                                                                 \
1621     __r1 -= __m;                                                        \
1622                                                                         \
1623     __r0 = __r1 % __d1;                                                 \
1624     __q0 = __r1 / __d1;                                                 \
1625     __m = (UWtype) __q0 * __d0;                                         \
1626     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1627     if (__r0 < __m)                                                     \
1628       {                                                                 \
1629         __q0--, __r0 += (d);                                            \
1630         if (__r0 >= (d))                                                \
1631           if (__r0 < __m)                                               \
1632             __q0--, __r0 += (d);                                        \
1633       }                                                                 \
1634     __r0 -= __m;                                                        \
1635                                                                         \
1636     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1637     (r) = __r0;                                                         \
1638   } while (0)
1639
1640 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1641    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1642 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1643 #define udiv_qrnnd(q, r, nh, nl, d) \
1644   do {                                                                  \
1645     USItype __r;                                                        \
1646     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1647     (r) = __r;                                                          \
1648   } while (0)
1649 #endif
1650
1651 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1652 #if !defined (udiv_qrnnd)
1653 #define UDIV_NEEDS_NORMALIZATION 1
1654 #define udiv_qrnnd __udiv_qrnnd_c
1655 #endif
1656
1657 #if !defined (count_leading_zeros)
1658 #define count_leading_zeros(count, x) \
1659   do {                                                                  \
1660     UWtype __xr = (x);                                                  \
1661     UWtype __a;                                                         \
1662                                                                         \
1663     if (W_TYPE_SIZE <= 32)                                              \
1664       {                                                                 \
1665         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1666           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1667           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1668       }                                                                 \
1669     else                                                                \
1670       {                                                                 \
1671         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1672           if (((__xr >> __a) & 0xff) != 0)                              \
1673             break;                                                      \
1674       }                                                                 \
1675                                                                         \
1676     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1677   } while (0)
1678 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1679 #endif
1680
1681 #if !defined (count_trailing_zeros)
1682 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1683    defined in asm, but if it is not, the C version above is good enough.  */
1684 #define count_trailing_zeros(count, x) \
1685   do {                                                                  \
1686     UWtype __ctz_x = (x);                                               \
1687     UWtype __ctz_c;                                                     \
1688     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1689     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1690   } while (0)
1691 #endif
1692
1693 #ifndef UDIV_NEEDS_NORMALIZATION
1694 #define UDIV_NEEDS_NORMALIZATION 0
1695 #endif