gcc/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, write to the Free
  29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  30    MA 02110-1301, USA.  */
  31
  32 /* You have to define the following before including this file:
  33
  34    UWtype -- An unsigned type, default type for operations (typically a "word")
  35    UHWtype -- An unsigned type, at least half the size of UWtype.
  36    UDWtype -- An unsigned type, at least twice as large a UWtype
  37    W_TYPE_SIZE -- size in bits of UWtype
  38
  39    UQItype -- Unsigned 8 bit type.
  40    SItype, USItype -- Signed and unsigned 32 bit types.
  41    DItype, UDItype -- Signed and unsigned 64 bit types.
  42
  43    On a 32 bit machine UWtype should typically be USItype;
  44    on a 64 bit machine, UWtype should typically be UDItype.  */
  45
  46 #define __BITS4 (W_TYPE_SIZE / 4)
  47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  50
  51 #ifndef W_TYPE_SIZE
  52 #define W_TYPE_SIZE     32
  53 #define UWtype          USItype
  54 #define UHWtype         USItype
  55 #define UDWtype         UDItype
  56 #endif
  57
  58 /* Used in glibc only.  */
  59 #ifndef attribute_hidden
  60 #define attribute_hidden
  61 #endif
  62
  63 extern const UQItype __clz_tab[256] attribute_hidden;
  64
  65 /* Define auxiliary asm macros.
  66
  67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  69    word product in HIGH_PROD and LOW_PROD.
  70
  71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  72    UDWtype product.  This is just a variant of umul_ppmm.
  73
  74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  75    denominator) divides a UDWtype, composed by the UWtype integers
  76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  78    than DENOMINATOR for correct operation.  If, in addition, the most
  79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  80    UDIV_NEEDS_NORMALIZATION is defined to 1.
  81
  82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  84    is rounded towards 0.
  85
  86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  87    msb to the first nonzero bit in the UWtype X.  This is the number of
  88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  90
  91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  92    from the least significant end.
  93
  94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  98    (i.e. carry out) is not stored anywhere, and is lost.
  99
 100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 105    and is lost.
 106
 107    If any of these macros are left undefined for a particular CPU,
 108    C macros are used.  */
 109
 110 /* The CPUs come in alphabetical order below.
 111
 112    Please add support for more CPUs here, or improve the current support
 113    for the CPUs below!
 114    (E.g. WE32100, IBM360.)  */
 115
 116 #if defined (__GNUC__) && !defined (NO_ASM)
 117
 118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 119    understood by gcc1.  Use cpp to avoid major code duplication.  */
 120 #if __GNUC__ < 2
 121 #define __CLOBBER_CC
 122 #define __AND_CLOBBER_CC
 123 #else /* __GNUC__ >= 2 */
 124 #define __CLOBBER_CC : "cc"
 125 #define __AND_CLOBBER_CC , "cc"
 126 #endif /* __GNUC__ < 2 */
 127
 128 #if defined (__alpha) && W_TYPE_SIZE == 64
 129 #define umul_ppmm(ph, pl, m0, m1) \
 130   do {                                                                  \
 131     UDItype __m0 = (m0), __m1 = (m1);                                   \
 132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 133     (pl) = __m0 * __m1;                                                 \
 134   } while (0)
 135 #define UMUL_TIME 46
 136 #ifndef LONGLONG_STANDALONE
 137 #define udiv_qrnnd(q, r, n1, n0, d) \
 138   do { UDItype __r;                                                     \
 139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 140     (r) = __r;                                                          \
 141   } while (0)
 142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 143 #define UDIV_TIME 220
 144 #endif /* LONGLONG_STANDALONE */
 145 #ifdef __alpha_cix__
 146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 148 #define COUNT_LEADING_ZEROS_0 64
 149 #else
 150 #define count_leading_zeros(COUNT,X) \
 151   do {                                                                  \
 152     UDItype __xr = (X), __t, __a;                                       \
 153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 155     __t = __builtin_alpha_extbl (__xr, __a);                            \
 156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 157   } while (0)
 158 #define count_trailing_zeros(COUNT,X) \
 159   do {                                                                  \
 160     UDItype __xr = (X), __t, __a;                                       \
 161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 162     __t = ~__t & -~__t;                                                 \
 163     __a = ((__t & 0xCC) != 0) * 2;                                      \
 164     __a += ((__t & 0xF0) != 0) * 4;                                     \
 165     __a += ((__t & 0xAA) != 0);                                         \
 166     __t = __builtin_alpha_extbl (__xr, __a);                            \
 167     __a <<= 3;                                                          \
 168     __t &= -__t;                                                        \
 169     __a += ((__t & 0xCC) != 0) * 2;                                     \
 170     __a += ((__t & 0xF0) != 0) * 4;                                     \
 171     __a += ((__t & 0xAA) != 0);                                         \
 172     (COUNT) = __a;                                                      \
 173   } while (0)
 174 #endif /* __alpha_cix__ */
 175 #endif /* __alpha */
 176
 177 #if defined (__arc__) && W_TYPE_SIZE == 32
 178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 180            : "=r" ((USItype) (sh)),                                     \
 181              "=&r" ((USItype) (sl))                                     \
 182            : "%r" ((USItype) (ah)),                                     \
 183              "rIJ" ((USItype) (bh)),                                    \
 184              "%r" ((USItype) (al)),                                     \
 185              "rIJ" ((USItype) (bl)))
 186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 188            : "=r" ((USItype) (sh)),                                     \
 189              "=&r" ((USItype) (sl))                                     \
 190            : "r" ((USItype) (ah)),                                      \
 191              "rIJ" ((USItype) (bh)),                                    \
 192              "r" ((USItype) (al)),                                      \
 193              "rIJ" ((USItype) (bl)))
 194 /* Call libgcc routine.  */
 195 #define umul_ppmm(w1, w0, u, v) \
 196 do {                                                                    \
 197   DWunion __w;                                                          \
 198   __w.ll = __umulsidi3 (u, v);                                          \
 199   w1 = __w.s.high;                                                      \
 200   w0 = __w.s.low;                                                       \
 201 } while (0)
 202 #define __umulsidi3 __umulsidi3
 203 UDItype __umulsidi3 (USItype, USItype);
 204 #endif
 205
 206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 209            : "=r" ((USItype) (sh)),                                     \
 210              "=&r" ((USItype) (sl))                                     \
 211            : "%r" ((USItype) (ah)),                                     \
 212              "rI" ((USItype) (bh)),                                     \
 213              "%r" ((USItype) (al)),                                     \
 214              "rI" ((USItype) (bl)) __CLOBBER_CC)
 215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 217            : "=r" ((USItype) (sh)),                                     \
 218              "=&r" ((USItype) (sl))                                     \
 219            : "r" ((USItype) (ah)),                                      \
 220              "rI" ((USItype) (bh)),                                     \
 221              "r" ((USItype) (al)),                                      \
 222              "rI" ((USItype) (bl)) __CLOBBER_CC)
 223 #define umul_ppmm(xh, xl, a, b) \
 224 {register USItype __t0, __t1, __t2;                                     \
 225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
 226            "    mov     %2, %5, lsr #16\n"                              \
 227            "    mov     %0, %6, lsr #16\n"                              \
 228            "    bic     %3, %5, %2, lsl #16\n"                          \
 229            "    bic     %4, %6, %0, lsl #16\n"                          \
 230            "    mul     %1, %3, %4\n"                                   \
 231            "    mul     %4, %2, %4\n"                                   \
 232            "    mul     %3, %0, %3\n"                                   \
 233            "    mul     %0, %2, %0\n"                                   \
 234            "    adds    %3, %4, %3\n"                                   \
 235            "    addcs   %0, %0, #65536\n"                               \
 236            "    adds    %1, %1, %3, lsl #16\n"                          \
 237            "    adc     %0, %0, %3, lsr #16"                            \
 238            : "=&r" ((USItype) (xh)),                                    \
 239              "=r" ((USItype) (xl)),                                     \
 240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 241            : "r" ((USItype) (a)),                                       \
 242              "r" ((USItype) (b)) __CLOBBER_CC );}
 243 #define UMUL_TIME 20
 244 #define UDIV_TIME 100
 245 #endif /* __arm__ */
 246
 247 #if defined(__arm__)
 248 /* Let gcc decide how best to implement count_leading_zeros.  */
 249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 250 #define COUNT_LEADING_ZEROS_0 32
 251 #endif
 252
 253 #if defined (__AVR__)
 254
 255 #if W_TYPE_SIZE == 16
 256 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 257 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
 258 #define COUNT_LEADING_ZEROS_0 16
 259 #endif /* W_TYPE_SIZE == 16 */
 260
 261 #if W_TYPE_SIZE == 32
 262 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
 263 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
 264 #define COUNT_LEADING_ZEROS_0 32
 265 #endif /* W_TYPE_SIZE == 32 */
 266
 267 #if W_TYPE_SIZE == 64
 268 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
 269 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
 270 #define COUNT_LEADING_ZEROS_0 64
 271 #endif /* W_TYPE_SIZE == 64 */
 272
 273 #endif /* defined (__AVR__) */
 274
 275 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 276 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 277 #if __CRIS_arch_version >= 8
 278 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 279 #endif
 280 #endif /* __CRIS__ */
 281
 282 #if defined (__hppa) && W_TYPE_SIZE == 32
 283 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 284   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 285            : "=r" ((USItype) (sh)),                                     \
 286              "=&r" ((USItype) (sl))                                     \
 287            : "%rM" ((USItype) (ah)),                                    \
 288              "rM" ((USItype) (bh)),                                     \
 289              "%rM" ((USItype) (al)),                                    \
 290              "rM" ((USItype) (bl)))
 291 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 292   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 293            : "=r" ((USItype) (sh)),                                     \
 294              "=&r" ((USItype) (sl))                                     \
 295            : "rM" ((USItype) (ah)),                                     \
 296              "rM" ((USItype) (bh)),                                     \
 297              "rM" ((USItype) (al)),                                     \
 298              "rM" ((USItype) (bl)))
 299 #if defined (_PA_RISC1_1)
 300 #define umul_ppmm(w1, w0, u, v) \
 301   do {                                                                  \
 302     union                                                               \
 303       {                                                                 \
 304         UDItype __f;                                                    \
 305         struct {USItype __w1, __w0;} __w1w0;                            \
 306       } __t;                                                            \
 307     __asm__ ("xmpyu %1,%2,%0"                                           \
 308              : "=x" (__t.__f)                                           \
 309              : "x" ((USItype) (u)),                                     \
 310                "x" ((USItype) (v)));                                    \
 311     (w1) = __t.__w1w0.__w1;                                             \
 312     (w0) = __t.__w1w0.__w0;                                             \
 313      } while (0)
 314 #define UMUL_TIME 8
 315 #else
 316 #define UMUL_TIME 30
 317 #endif
 318 #define UDIV_TIME 40
 319 #define count_leading_zeros(count, x) \
 320   do {                                                                  \
 321     USItype __tmp;                                                      \
 322     __asm__ (                                                           \
 323        "ldi             1,%0\n"                                         \
 324 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 325 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 326 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 327 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 328 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 329 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 330 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 331 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 332 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 333 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 334 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 335 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 336 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 337 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 338         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 339   } while (0)
 340 #endif
 341
 342 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 343 #if !defined (__zarch__)
 344 #define smul_ppmm(xh, xl, m0, m1) \
 345   do {                                                                  \
 346     union {DItype __ll;                                                 \
 347            struct {USItype __h, __l;} __i;                              \
 348           } __x;                                                        \
 349     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 350              : "=&r" (__x.__ll)                                         \
 351              : "r" (m0), "r" (m1));                                     \
 352     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 353   } while (0)
 354 #define sdiv_qrnnd(q, r, n1, n0, d) \
 355   do {                                                                  \
 356     union {DItype __ll;                                                 \
 357            struct {USItype __h, __l;} __i;                              \
 358           } __x;                                                        \
 359     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 360     __asm__ ("dr %0,%2"                                                 \
 361              : "=r" (__x.__ll)                                          \
 362              : "0" (__x.__ll), "r" (d));                                \
 363     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 364   } while (0)
 365 #else
 366 #define smul_ppmm(xh, xl, m0, m1) \
 367   do {                                                                  \
 368     register SItype r0 __asm__ ("0");                                   \
 369     register SItype r1 __asm__ ("1") = m0;                              \
 370                                                                         \
 371     __asm__ ("mr\t%%r0,%3"                                              \
 372              : "=r" (r0), "=r" (r1)                                     \
 373              : "r"  (r1),  "r" (m1));                                   \
 374     (xh) = r0; (xl) = r1;                                               \
 375   } while (0)
 376 #define sdiv_qrnnd(q, r, n1, n0, d) \
 377   do {                                                                  \
 378     register SItype r0 __asm__ ("0") = n0;                              \
 379     register SItype r1 __asm__ ("1") = n1;                              \
 380                                                                         \
 381     __asm__ ("dr\t%%r0,%3"                                              \
 382              : "=r" (r0), "=r" (r1)                                     \
 383              : "r" (r0), "r" (r1), "r" (d));                            \
 384     (q) = r0; (r) = r1;                                                 \
 385   } while (0)
 386 #endif /* __zarch__ */
 387 #endif
 388
 389 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 390 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 391   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 392            : "=r" ((USItype) (sh)),                                     \
 393              "=&r" ((USItype) (sl))                                     \
 394            : "%0" ((USItype) (ah)),                                     \
 395              "g" ((USItype) (bh)),                                      \
 396              "%1" ((USItype) (al)),                                     \
 397              "g" ((USItype) (bl)))
 398 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 399   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 400            : "=r" ((USItype) (sh)),                                     \
 401              "=&r" ((USItype) (sl))                                     \
 402            : "0" ((USItype) (ah)),                                      \
 403              "g" ((USItype) (bh)),                                      \
 404              "1" ((USItype) (al)),                                      \
 405              "g" ((USItype) (bl)))
 406 #define umul_ppmm(w1, w0, u, v) \
 407   __asm__ ("mul{l} %3"                                                  \
 408            : "=a" ((USItype) (w0)),                                     \
 409              "=d" ((USItype) (w1))                                      \
 410            : "%0" ((USItype) (u)),                                      \
 411              "rm" ((USItype) (v)))
 412 #define udiv_qrnnd(q, r, n1, n0, dv) \
 413   __asm__ ("div{l} %4"                                                  \
 414            : "=a" ((USItype) (q)),                                      \
 415              "=d" ((USItype) (r))                                       \
 416            : "0" ((USItype) (n0)),                                      \
 417              "1" ((USItype) (n1)),                                      \
 418              "rm" ((USItype) (dv)))
 419 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 420 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 421 #define UMUL_TIME 40
 422 #define UDIV_TIME 40
 423 #endif /* 80x86 */
 424
 425 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 426 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 427   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 428            : "=r" ((UDItype) (sh)),                                     \
 429              "=&r" ((UDItype) (sl))                                     \
 430            : "%0" ((UDItype) (ah)),                                     \
 431              "rme" ((UDItype) (bh)),                                    \
 432              "%1" ((UDItype) (al)),                                     \
 433              "rme" ((UDItype) (bl)))
 434 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 435   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 436            : "=r" ((UDItype) (sh)),                                     \
 437              "=&r" ((UDItype) (sl))                                     \
 438            : "0" ((UDItype) (ah)),                                      \
 439              "rme" ((UDItype) (bh)),                                    \
 440              "1" ((UDItype) (al)),                                      \
 441              "rme" ((UDItype) (bl)))
 442 #define umul_ppmm(w1, w0, u, v) \
 443   __asm__ ("mul{q} %3"                                                  \
 444            : "=a" ((UDItype) (w0)),                                     \
 445              "=d" ((UDItype) (w1))                                      \
 446            : "%0" ((UDItype) (u)),                                      \
 447              "rm" ((UDItype) (v)))
 448 #define udiv_qrnnd(q, r, n1, n0, dv) \
 449   __asm__ ("div{q} %4"                                                  \
 450            : "=a" ((UDItype) (q)),                                      \
 451              "=d" ((UDItype) (r))                                       \
 452            : "0" ((UDItype) (n0)),                                      \
 453              "1" ((UDItype) (n1)),                                      \
 454              "rm" ((UDItype) (dv)))
 455 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
 456 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
 457 #define UMUL_TIME 40
 458 #define UDIV_TIME 40
 459 #endif /* x86_64 */
 460
 461 #if defined (__i960__) && W_TYPE_SIZE == 32
 462 #define umul_ppmm(w1, w0, u, v) \
 463   ({union {UDItype __ll;                                                \
 464            struct {USItype __l, __h;} __i;                              \
 465           } __xx;                                                       \
 466   __asm__ ("emul        %2,%1,%0"                                       \
 467            : "=d" (__xx.__ll)                                           \
 468            : "%dI" ((USItype) (u)),                                     \
 469              "dI" ((USItype) (v)));                                     \
 470   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 471 #define __umulsidi3(u, v) \
 472   ({UDItype __w;                                                        \
 473     __asm__ ("emul      %2,%1,%0"                                       \
 474              : "=d" (__w)                                               \
 475              : "%dI" ((USItype) (u)),                                   \
 476                "dI" ((USItype) (v)));                                   \
 477     __w; })
 478 #endif /* __i960__ */
 479
 480 #if defined (__ia64) && W_TYPE_SIZE == 64
 481 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 482    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 483    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 484    register, which takes an extra cycle.  */
 485 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 486   do {                                                                  \
 487     UWtype __x;                                                         \
 488     __x = (al) - (bl);                                                  \
 489     if ((al) < (bl))                                                    \
 490       (sh) = (ah) - (bh) - 1;                                           \
 491     else                                                                \
 492       (sh) = (ah) - (bh);                                               \
 493     (sl) = __x;                                                         \
 494   } while (0)
 495
 496 /* Do both product parts in assembly, since that gives better code with
 497    all gcc versions.  Some callers will just use the upper part, and in
 498    that situation we waste an instruction, but not any cycles.  */
 499 #define umul_ppmm(ph, pl, m0, m1)                                       \
 500   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 501            : "=&f" (ph), "=f" (pl)                                      \
 502            : "f" (m0), "f" (m1))
 503 #define count_leading_zeros(count, x)                                   \
 504   do {                                                                  \
 505     UWtype _x = (x), _y, _a, _c;                                        \
 506     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 507     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 508     _c = (_a - 1) << 3;                                                 \
 509     _x >>= _c;                                                          \
 510     if (_x >= 1 << 4)                                                   \
 511       _x >>= 4, _c += 4;                                                \
 512     if (_x >= 1 << 2)                                                   \
 513       _x >>= 2, _c += 2;                                                \
 514     _c += _x >> 1;                                                      \
 515     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 516   } while (0)
 517 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 518    based, and we don't need a special case for x==0 here */
 519 #define count_trailing_zeros(count, x)                                  \
 520   do {                                                                  \
 521     UWtype __ctz_x = (x);                                               \
 522     __asm__ ("popcnt %0 = %1"                                           \
 523              : "=r" (count)                                             \
 524              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 525   } while (0)
 526 #define UMUL_TIME 14
 527 #endif
 528
 529 #if defined (__M32R__) && W_TYPE_SIZE == 32
 530 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 531   /* The cmp clears the condition bit.  */ \
 532   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 533            : "=r" ((USItype) (sh)),                                     \
 534              "=&r" ((USItype) (sl))                                     \
 535            : "0" ((USItype) (ah)),                                      \
 536              "r" ((USItype) (bh)),                                      \
 537              "1" ((USItype) (al)),                                      \
 538              "r" ((USItype) (bl))                                       \
 539            : "cbit")
 540 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 541   /* The cmp clears the condition bit.  */ \
 542   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 543            : "=r" ((USItype) (sh)),                                     \
 544              "=&r" ((USItype) (sl))                                     \
 545            : "0" ((USItype) (ah)),                                      \
 546              "r" ((USItype) (bh)),                                      \
 547              "1" ((USItype) (al)),                                      \
 548              "r" ((USItype) (bl))                                       \
 549            : "cbit")
 550 #endif /* __M32R__ */
 551
 552 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 553 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 554   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 555            : "=d" ((USItype) (sh)),                                     \
 556              "=&d" ((USItype) (sl))                                     \
 557            : "%0" ((USItype) (ah)),                                     \
 558              "d" ((USItype) (bh)),                                      \
 559              "%1" ((USItype) (al)),                                     \
 560              "g" ((USItype) (bl)))
 561 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 562   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 563            : "=d" ((USItype) (sh)),                                     \
 564              "=&d" ((USItype) (sl))                                     \
 565            : "0" ((USItype) (ah)),                                      \
 566              "d" ((USItype) (bh)),                                      \
 567              "1" ((USItype) (al)),                                      \
 568              "g" ((USItype) (bl)))
 569
 570 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 571 #if (defined (__mc68020__) && !defined (__mc68060__))
 572 #define umul_ppmm(w1, w0, u, v) \
 573   __asm__ ("mulu%.l %3,%1:%0"                                           \
 574            : "=d" ((USItype) (w0)),                                     \
 575              "=d" ((USItype) (w1))                                      \
 576            : "%0" ((USItype) (u)),                                      \
 577              "dmi" ((USItype) (v)))
 578 #define UMUL_TIME 45
 579 #define udiv_qrnnd(q, r, n1, n0, d) \
 580   __asm__ ("divu%.l %4,%1:%0"                                           \
 581            : "=d" ((USItype) (q)),                                      \
 582              "=d" ((USItype) (r))                                       \
 583            : "0" ((USItype) (n0)),                                      \
 584              "1" ((USItype) (n1)),                                      \
 585              "dmi" ((USItype) (d)))
 586 #define UDIV_TIME 90
 587 #define sdiv_qrnnd(q, r, n1, n0, d) \
 588   __asm__ ("divs%.l %4,%1:%0"                                           \
 589            : "=d" ((USItype) (q)),                                      \
 590              "=d" ((USItype) (r))                                       \
 591            : "0" ((USItype) (n0)),                                      \
 592              "1" ((USItype) (n1)),                                      \
 593              "dmi" ((USItype) (d)))
 594
 595 #elif defined (__mcoldfire__) /* not mc68020 */
 596
 597 #define umul_ppmm(xh, xl, a, b) \
 598   __asm__ ("| Inlined umul_ppmm\n"                                      \
 599            "    move%.l %2,%/d0\n"                                      \
 600            "    move%.l %3,%/d1\n"                                      \
 601            "    move%.l %/d0,%/d2\n"                                    \
 602            "    swap    %/d0\n"                                         \
 603            "    move%.l %/d1,%/d3\n"                                    \
 604            "    swap    %/d1\n"                                         \
 605            "    move%.w %/d2,%/d4\n"                                    \
 606            "    mulu    %/d3,%/d4\n"                                    \
 607            "    mulu    %/d1,%/d2\n"                                    \
 608            "    mulu    %/d0,%/d3\n"                                    \
 609            "    mulu    %/d0,%/d1\n"                                    \
 610            "    move%.l %/d4,%/d0\n"                                    \
 611            "    clr%.w  %/d0\n"                                         \
 612            "    swap    %/d0\n"                                         \
 613            "    add%.l  %/d0,%/d2\n"                                    \
 614            "    add%.l  %/d3,%/d2\n"                                    \
 615            "    jcc     1f\n"                                           \
 616            "    add%.l  %#65536,%/d1\n"                                 \
 617            "1:  swap    %/d2\n"                                         \
 618            "    moveq   %#0,%/d0\n"                                     \
 619            "    move%.w %/d2,%/d0\n"                                    \
 620            "    move%.w %/d4,%/d2\n"                                    \
 621            "    move%.l %/d2,%1\n"                                      \
 622            "    add%.l  %/d1,%/d0\n"                                    \
 623            "    move%.l %/d0,%0"                                        \
 624            : "=g" ((USItype) (xh)),                                     \
 625              "=g" ((USItype) (xl))                                      \
 626            : "g" ((USItype) (a)),                                       \
 627              "g" ((USItype) (b))                                        \
 628            : "d0", "d1", "d2", "d3", "d4")
 629 #define UMUL_TIME 100
 630 #define UDIV_TIME 400
 631 #else /* not ColdFire */
 632 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 633 #define umul_ppmm(xh, xl, a, b) \
 634   __asm__ ("| Inlined umul_ppmm\n"                                      \
 635            "    move%.l %2,%/d0\n"                                      \
 636            "    move%.l %3,%/d1\n"                                      \
 637            "    move%.l %/d0,%/d2\n"                                    \
 638            "    swap    %/d0\n"                                         \
 639            "    move%.l %/d1,%/d3\n"                                    \
 640            "    swap    %/d1\n"                                         \
 641            "    move%.w %/d2,%/d4\n"                                    \
 642            "    mulu    %/d3,%/d4\n"                                    \
 643            "    mulu    %/d1,%/d2\n"                                    \
 644            "    mulu    %/d0,%/d3\n"                                    \
 645            "    mulu    %/d0,%/d1\n"                                    \
 646            "    move%.l %/d4,%/d0\n"                                    \
 647            "    eor%.w  %/d0,%/d0\n"                                    \
 648            "    swap    %/d0\n"                                         \
 649            "    add%.l  %/d0,%/d2\n"                                    \
 650            "    add%.l  %/d3,%/d2\n"                                    \
 651            "    jcc     1f\n"                                           \
 652            "    add%.l  %#65536,%/d1\n"                                 \
 653            "1:  swap    %/d2\n"                                         \
 654            "    moveq   %#0,%/d0\n"                                     \
 655            "    move%.w %/d2,%/d0\n"                                    \
 656            "    move%.w %/d4,%/d2\n"                                    \
 657            "    move%.l %/d2,%1\n"                                      \
 658            "    add%.l  %/d1,%/d0\n"                                    \
 659            "    move%.l %/d0,%0"                                        \
 660            : "=g" ((USItype) (xh)),                                     \
 661              "=g" ((USItype) (xl))                                      \
 662            : "g" ((USItype) (a)),                                       \
 663              "g" ((USItype) (b))                                        \
 664            : "d0", "d1", "d2", "d3", "d4")
 665 #define UMUL_TIME 100
 666 #define UDIV_TIME 400
 667
 668 #endif /* not mc68020 */
 669
 670 /* The '020, '030, '040 and '060 have bitfield insns.
 671    cpu32 disguises as a 68020, but lacks them.  */
 672 #if defined (__mc68020__) && !defined (__mcpu32__)
 673 #define count_leading_zeros(count, x) \
 674   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 675            : "=d" ((USItype) (count))                                   \
 676            : "od" ((USItype) (x)), "n" (0))
 677 /* Some ColdFire architectures have a ff1 instruction supported via
 678    __builtin_clz. */
 679 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 680 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 681 #define COUNT_LEADING_ZEROS_0 32
 682 #endif
 683 #endif /* mc68000 */
 684
 685 #if defined (__m88000__) && W_TYPE_SIZE == 32
 686 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 687   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 688            : "=r" ((USItype) (sh)),                                     \
 689              "=&r" ((USItype) (sl))                                     \
 690            : "%rJ" ((USItype) (ah)),                                    \
 691              "rJ" ((USItype) (bh)),                                     \
 692              "%rJ" ((USItype) (al)),                                    \
 693              "rJ" ((USItype) (bl)))
 694 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 695   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 696            : "=r" ((USItype) (sh)),                                     \
 697              "=&r" ((USItype) (sl))                                     \
 698            : "rJ" ((USItype) (ah)),                                     \
 699              "rJ" ((USItype) (bh)),                                     \
 700              "rJ" ((USItype) (al)),                                     \
 701              "rJ" ((USItype) (bl)))
 702 #define count_leading_zeros(count, x) \
 703   do {                                                                  \
 704     USItype __cbtmp;                                                    \
 705     __asm__ ("ff1 %0,%1"                                                \
 706              : "=r" (__cbtmp)                                           \
 707              : "r" ((USItype) (x)));                                    \
 708     (count) = __cbtmp ^ 31;                                             \
 709   } while (0)
 710 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 711 #if defined (__mc88110__)
 712 #define umul_ppmm(wh, wl, u, v) \
 713   do {                                                                  \
 714     union {UDItype __ll;                                                \
 715            struct {USItype __h, __l;} __i;                              \
 716           } __xx;                                                       \
 717     __asm__ ("mulu.d    %0,%1,%2"                                       \
 718              : "=r" (__xx.__ll)                                         \
 719              : "r" ((USItype) (u)),                                     \
 720                "r" ((USItype) (v)));                                    \
 721     (wh) = __xx.__i.__h;                                                \
 722     (wl) = __xx.__i.__l;                                                \
 723   } while (0)
 724 #define udiv_qrnnd(q, r, n1, n0, d) \
 725   ({union {UDItype __ll;                                                \
 726            struct {USItype __h, __l;} __i;                              \
 727           } __xx;                                                       \
 728   USItype __q;                                                          \
 729   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 730   __asm__ ("divu.d %0,%1,%2"                                            \
 731            : "=r" (__q)                                                 \
 732            : "r" (__xx.__ll),                                           \
 733              "r" ((USItype) (d)));                                      \
 734   (r) = (n0) - __q * (d); (q) = __q; })
 735 #define UMUL_TIME 5
 736 #define UDIV_TIME 25
 737 #else
 738 #define UMUL_TIME 17
 739 #define UDIV_TIME 150
 740 #endif /* __mc88110__ */
 741 #endif /* __m88000__ */
 742
 743 #if defined (__mn10300__)
 744 # if defined (__AM33__)
 745 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 746 #  define umul_ppmm(w1, w0, u, v)               \
 747     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 748 #  define smul_ppmm(w1, w0, u, v)               \
 749     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 750 # else
 751 #  define umul_ppmm(w1, w0, u, v)               \
 752     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 753 #  define smul_ppmm(w1, w0, u, v)               \
 754     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 755 # endif
 756 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 757   do {                                          \
 758     DWunion __s, __a, __b;                      \
 759     __a.s.low = (al); __a.s.high = (ah);        \
 760     __b.s.low = (bl); __b.s.high = (bh);        \
 761     __s.ll = __a.ll + __b.ll;                   \
 762     (sl) = __s.s.low; (sh) = __s.s.high;        \
 763   } while (0)
 764 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 765   do {                                          \
 766     DWunion __s, __a, __b;                      \
 767     __a.s.low = (al); __a.s.high = (ah);        \
 768     __b.s.low = (bl); __b.s.high = (bh);        \
 769     __s.ll = __a.ll - __b.ll;                   \
 770     (sl) = __s.s.low; (sh) = __s.s.high;        \
 771   } while (0)
 772 # define udiv_qrnnd(q, r, nh, nl, d)            \
 773   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 774 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 775   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 776 # define UMUL_TIME 3
 777 # define UDIV_TIME 38
 778 #endif
 779
 780 #if defined (__mips__) && W_TYPE_SIZE == 32
 781 #define umul_ppmm(w1, w0, u, v)                                         \
 782   do {                                                                  \
 783     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 784     (w1) = (USItype) (__x >> 32);                                       \
 785     (w0) = (USItype) (__x);                                             \
 786   } while (0)
 787 #define UMUL_TIME 10
 788 #define UDIV_TIME 100
 789
 790 #if (__mips == 32 || __mips == 64) && ! __mips16
 791 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 792 #define COUNT_LEADING_ZEROS_0 32
 793 #endif
 794 #endif /* __mips__ */
 795
 796 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 797 #define umul_ppmm(w1, w0, u, v) \
 798   ({union {UDItype __ll;                                                \
 799            struct {USItype __l, __h;} __i;                              \
 800           } __xx;                                                       \
 801   __asm__ ("meid %2,%0"                                                 \
 802            : "=g" (__xx.__ll)                                           \
 803            : "%0" ((USItype) (u)),                                      \
 804              "g" ((USItype) (v)));                                      \
 805   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 806 #define __umulsidi3(u, v) \
 807   ({UDItype __w;                                                        \
 808     __asm__ ("meid %2,%0"                                               \
 809              : "=g" (__w)                                               \
 810              : "%0" ((USItype) (u)),                                    \
 811                "g" ((USItype) (v)));                                    \
 812     __w; })
 813 #define udiv_qrnnd(q, r, n1, n0, d) \
 814   ({union {UDItype __ll;                                                \
 815            struct {USItype __l, __h;} __i;                              \
 816           } __xx;                                                       \
 817   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 818   __asm__ ("deid %2,%0"                                                 \
 819            : "=g" (__xx.__ll)                                           \
 820            : "0" (__xx.__ll),                                           \
 821              "g" ((USItype) (d)));                                      \
 822   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 823 #define count_trailing_zeros(count,x) \
 824   do {                                                                  \
 825     __asm__ ("ffsd     %2,%0"                                           \
 826             : "=r" ((USItype) (count))                                  \
 827             : "0" ((USItype) 0),                                        \
 828               "r" ((USItype) (x)));                                     \
 829   } while (0)
 830 #endif /* __ns32000__ */
 831
 832 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 833    system vendor compilers.
 834    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 835    enough, since that hits ARM and m68k too.  */
 836 #if (defined (_ARCH_PPC)        /* AIX */                               \
 837      || defined (_ARCH_PWR)     /* AIX */                               \
 838      || defined (_ARCH_COM)     /* AIX */                               \
 839      || defined (__powerpc__)   /* gcc */                               \
 840      || defined (__POWERPC__)   /* BEOS */                              \
 841      || defined (__ppc__)       /* Darwin */                            \
 842      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 843      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 844          && CPU_FAMILY == PPC)                                                \
 845      ) && W_TYPE_SIZE == 32
 846 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 847   do {                                                                  \
 848     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 849       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 850              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 851     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 852       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 853              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 854     else                                                                \
 855       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 856              : "=r" (sh), "=&r" (sl)                                    \
 857              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 858   } while (0)
 859 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 860   do {                                                                  \
 861     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 862       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 863                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 864     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 865       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 866                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 867     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 868       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 869                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 870     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 871       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 872                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 873     else                                                                \
 874       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 875                : "=r" (sh), "=&r" (sl)                                  \
 876                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 877   } while (0)
 878 #define count_leading_zeros(count, x) \
 879   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 880 #define COUNT_LEADING_ZEROS_0 32
 881 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 882   || defined (__ppc__)                                                    \
 883   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 884   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 885          && CPU_FAMILY == PPC)
 886 #define umul_ppmm(ph, pl, m0, m1) \
 887   do {                                                                  \
 888     USItype __m0 = (m0), __m1 = (m1);                                   \
 889     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 890     (pl) = __m0 * __m1;                                                 \
 891   } while (0)
 892 #define UMUL_TIME 15
 893 #define smul_ppmm(ph, pl, m0, m1) \
 894   do {                                                                  \
 895     SItype __m0 = (m0), __m1 = (m1);                                    \
 896     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 897     (pl) = __m0 * __m1;                                                 \
 898   } while (0)
 899 #define SMUL_TIME 14
 900 #define UDIV_TIME 120
 901 #elif defined (_ARCH_PWR)
 902 #define UMUL_TIME 8
 903 #define smul_ppmm(xh, xl, m0, m1) \
 904   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 905 #define SMUL_TIME 4
 906 #define sdiv_qrnnd(q, r, nh, nl, d) \
 907   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 908 #define UDIV_TIME 100
 909 #endif
 910 #endif /* 32-bit POWER architecture variants.  */
 911
 912 /* We should test _IBMR2 here when we add assembly support for the system
 913    vendor compilers.  */
 914 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 915 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 916   do {                                                                  \
 917     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 918       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 919              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 920     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 921       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 922              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 923     else                                                                \
 924       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 925              : "=r" (sh), "=&r" (sl)                                    \
 926              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 927   } while (0)
 928 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 929   do {                                                                  \
 930     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 931       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 932                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 933     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 934       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 935                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 936     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 937       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 938                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 939     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 940       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 941                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 942     else                                                                \
 943       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 944                : "=r" (sh), "=&r" (sl)                                  \
 945                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 946   } while (0)
 947 #define count_leading_zeros(count, x) \
 948   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 949 #define COUNT_LEADING_ZEROS_0 64
 950 #define umul_ppmm(ph, pl, m0, m1) \
 951   do {                                                                  \
 952     UDItype __m0 = (m0), __m1 = (m1);                                   \
 953     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 954     (pl) = __m0 * __m1;                                                 \
 955   } while (0)
 956 #define UMUL_TIME 15
 957 #define smul_ppmm(ph, pl, m0, m1) \
 958   do {                                                                  \
 959     DItype __m0 = (m0), __m1 = (m1);                                    \
 960     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 961     (pl) = __m0 * __m1;                                                 \
 962   } while (0)
 963 #define SMUL_TIME 14  /* ??? */
 964 #define UDIV_TIME 120 /* ??? */
 965 #endif /* 64-bit PowerPC.  */
 966
 967 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 968 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 969   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 970            : "=r" ((USItype) (sh)),                                     \
 971              "=&r" ((USItype) (sl))                                     \
 972            : "%0" ((USItype) (ah)),                                     \
 973              "r" ((USItype) (bh)),                                      \
 974              "%1" ((USItype) (al)),                                     \
 975              "r" ((USItype) (bl)))
 976 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 977   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 978            : "=r" ((USItype) (sh)),                                     \
 979              "=&r" ((USItype) (sl))                                     \
 980            : "0" ((USItype) (ah)),                                      \
 981              "r" ((USItype) (bh)),                                      \
 982              "1" ((USItype) (al)),                                      \
 983              "r" ((USItype) (bl)))
 984 #define umul_ppmm(ph, pl, m0, m1) \
 985   do {                                                                  \
 986     USItype __m0 = (m0), __m1 = (m1);                                   \
 987     __asm__ (                                                           \
 988        "s       r2,r2\n"                                                \
 989 "       mts     r10,%2\n"                                               \
 990 "       m       r2,%3\n"                                                \
 991 "       m       r2,%3\n"                                                \
 992 "       m       r2,%3\n"                                                \
 993 "       m       r2,%3\n"                                                \
 994 "       m       r2,%3\n"                                                \
 995 "       m       r2,%3\n"                                                \
 996 "       m       r2,%3\n"                                                \
 997 "       m       r2,%3\n"                                                \
 998 "       m       r2,%3\n"                                                \
 999 "       m       r2,%3\n"                                                \
1000 "       m       r2,%3\n"                                                \
1001 "       m       r2,%3\n"                                                \
1002 "       m       r2,%3\n"                                                \
1003 "       m       r2,%3\n"                                                \
1004 "       m       r2,%3\n"                                                \
1005 "       m       r2,%3\n"                                                \
1006 "       cas     %0,r2,r0\n"                                             \
1007 "       mfs     r10,%1"                                                 \
1008              : "=r" ((USItype) (ph)),                                   \
1009                "=r" ((USItype) (pl))                                    \
1010              : "%r" (__m0),                                             \
1011                 "r" (__m1)                                              \
1012              : "r2");                                                   \
1013     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1014              + (((SItype) __m1 >> 31) & __m0));                         \
1015   } while (0)
1016 #define UMUL_TIME 20
1017 #define UDIV_TIME 200
1018 #define count_leading_zeros(count, x) \
1019   do {                                                                  \
1020     if ((x) >= 0x10000)                                                 \
1021       __asm__ ("clz     %0,%1"                                          \
1022                : "=r" ((USItype) (count))                               \
1023                : "r" ((USItype) (x) >> 16));                            \
1024     else                                                                \
1025       {                                                                 \
1026         __asm__ ("clz   %0,%1"                                          \
1027                  : "=r" ((USItype) (count))                             \
1028                  : "r" ((USItype) (x)));                                        \
1029         (count) += 16;                                                  \
1030       }                                                                 \
1031   } while (0)
1032 #endif
1033
1034 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1035 #ifndef __sh1__
1036 #define umul_ppmm(w1, w0, u, v) \
1037   __asm__ (                                                             \
1038        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1039            : "=r<" ((USItype)(w1)),                                     \
1040              "=r<" ((USItype)(w0))                                      \
1041            : "r" ((USItype)(u)),                                        \
1042              "r" ((USItype)(v))                                         \
1043            : "macl", "mach")
1044 #define UMUL_TIME 5
1045 #endif
1046
1047 /* This is the same algorithm as __udiv_qrnnd_c.  */
1048 #define UDIV_NEEDS_NORMALIZATION 1
1049
1050 #define udiv_qrnnd(q, r, n1, n0, d) \
1051   do {                                                                  \
1052     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1053                         __attribute__ ((visibility ("hidden")));        \
1054     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1055     __asm__ (                                                           \
1056         "mov%M4 %4,r5\n"                                                \
1057 "       swap.w %3,r4\n"                                                 \
1058 "       swap.w r5,r6\n"                                                 \
1059 "       jsr @%5\n"                                                      \
1060 "       shll16 r6\n"                                                    \
1061 "       swap.w r4,r4\n"                                                 \
1062 "       jsr @%5\n"                                                      \
1063 "       swap.w r1,%0\n"                                                 \
1064 "       or r1,%0"                                                       \
1065         : "=r" (q), "=&z" (r)                                           \
1066         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1067         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1068   } while (0)
1069
1070 #define UDIV_TIME 80
1071
1072 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1073   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1074            : "=r" (sh), "=r" (sl)                                       \
1075            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1076
1077 #endif /* __sh__ */
1078
1079 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1080 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1081 #define count_leading_zeros(count, x) \
1082   do                                                                    \
1083     {                                                                   \
1084       UDItype x_ = (USItype)(x);                                        \
1085       SItype c_;                                                        \
1086                                                                         \
1087       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1088       (count) = c_ - 31;                                                \
1089     }                                                                   \
1090   while (0)
1091 #define COUNT_LEADING_ZEROS_0 32
1092 #endif
1093
1094 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1095     && W_TYPE_SIZE == 32
1096 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1097   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1098            : "=r" ((USItype) (sh)),                                     \
1099              "=&r" ((USItype) (sl))                                     \
1100            : "%rJ" ((USItype) (ah)),                                    \
1101              "rI" ((USItype) (bh)),                                     \
1102              "%rJ" ((USItype) (al)),                                    \
1103              "rI" ((USItype) (bl))                                      \
1104            __CLOBBER_CC)
1105 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1106   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1107            : "=r" ((USItype) (sh)),                                     \
1108              "=&r" ((USItype) (sl))                                     \
1109            : "rJ" ((USItype) (ah)),                                     \
1110              "rI" ((USItype) (bh)),                                     \
1111              "rJ" ((USItype) (al)),                                     \
1112              "rI" ((USItype) (bl))                                      \
1113            __CLOBBER_CC)
1114 #if defined (__sparc_v8__)
1115 #define umul_ppmm(w1, w0, u, v) \
1116   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1117            : "=r" ((USItype) (w1)),                                     \
1118              "=r" ((USItype) (w0))                                      \
1119            : "r" ((USItype) (u)),                                       \
1120              "r" ((USItype) (v)))
1121 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1122   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1123            : "=&r" ((USItype) (__q)),                                   \
1124              "=&r" ((USItype) (__r))                                    \
1125            : "r" ((USItype) (__n1)),                                    \
1126              "r" ((USItype) (__n0)),                                    \
1127              "r" ((USItype) (__d)))
1128 #else
1129 #if defined (__sparclite__)
1130 /* This has hardware multiply but not divide.  It also has two additional
1131    instructions scan (ffs from high bit) and divscc.  */
1132 #define umul_ppmm(w1, w0, u, v) \
1133   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1134            : "=r" ((USItype) (w1)),                                     \
1135              "=r" ((USItype) (w0))                                      \
1136            : "r" ((USItype) (u)),                                       \
1137              "r" ((USItype) (v)))
1138 #define udiv_qrnnd(q, r, n1, n0, d) \
1139   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1140 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1141 "       tst     %%g0\n"                                                 \
1142 "       divscc  %3,%4,%%g1\n"                                           \
1143 "       divscc  %%g1,%4,%%g1\n"                                         \
1144 "       divscc  %%g1,%4,%%g1\n"                                         \
1145 "       divscc  %%g1,%4,%%g1\n"                                         \
1146 "       divscc  %%g1,%4,%%g1\n"                                         \
1147 "       divscc  %%g1,%4,%%g1\n"                                         \
1148 "       divscc  %%g1,%4,%%g1\n"                                         \
1149 "       divscc  %%g1,%4,%%g1\n"                                         \
1150 "       divscc  %%g1,%4,%%g1\n"                                         \
1151 "       divscc  %%g1,%4,%%g1\n"                                         \
1152 "       divscc  %%g1,%4,%%g1\n"                                         \
1153 "       divscc  %%g1,%4,%%g1\n"                                         \
1154 "       divscc  %%g1,%4,%%g1\n"                                         \
1155 "       divscc  %%g1,%4,%%g1\n"                                         \
1156 "       divscc  %%g1,%4,%%g1\n"                                         \
1157 "       divscc  %%g1,%4,%%g1\n"                                         \
1158 "       divscc  %%g1,%4,%%g1\n"                                         \
1159 "       divscc  %%g1,%4,%%g1\n"                                         \
1160 "       divscc  %%g1,%4,%%g1\n"                                         \
1161 "       divscc  %%g1,%4,%%g1\n"                                         \
1162 "       divscc  %%g1,%4,%%g1\n"                                         \
1163 "       divscc  %%g1,%4,%%g1\n"                                         \
1164 "       divscc  %%g1,%4,%%g1\n"                                         \
1165 "       divscc  %%g1,%4,%%g1\n"                                         \
1166 "       divscc  %%g1,%4,%%g1\n"                                         \
1167 "       divscc  %%g1,%4,%%g1\n"                                         \
1168 "       divscc  %%g1,%4,%%g1\n"                                         \
1169 "       divscc  %%g1,%4,%%g1\n"                                         \
1170 "       divscc  %%g1,%4,%%g1\n"                                         \
1171 "       divscc  %%g1,%4,%%g1\n"                                         \
1172 "       divscc  %%g1,%4,%%g1\n"                                         \
1173 "       divscc  %%g1,%4,%0\n"                                           \
1174 "       rd      %%y,%1\n"                                               \
1175 "       bl,a 1f\n"                                                      \
1176 "       add     %1,%4,%1\n"                                             \
1177 "1:     ! End of inline udiv_qrnnd"                                     \
1178            : "=r" ((USItype) (q)),                                      \
1179              "=r" ((USItype) (r))                                       \
1180            : "r" ((USItype) (n1)),                                      \
1181              "r" ((USItype) (n0)),                                      \
1182              "rI" ((USItype) (d))                                       \
1183            : "g1" __AND_CLOBBER_CC)
1184 #define UDIV_TIME 37
1185 #define count_leading_zeros(count, x) \
1186   do {                                                                  \
1187   __asm__ ("scan %1,1,%0"                                               \
1188            : "=r" ((USItype) (count))                                   \
1189            : "r" ((USItype) (x)));                                      \
1190   } while (0)
1191 /* Early sparclites return 63 for an argument of 0, but they warn that future
1192    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1193    undefined.  */
1194 #else
1195 /* SPARC without integer multiplication and divide instructions.
1196    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1197 #define umul_ppmm(w1, w0, u, v) \
1198   __asm__ ("! Inlined umul_ppmm\n"                                      \
1199 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1200 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1201 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1202 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1203 "       mulscc  %%g1,%3,%%g1\n"                                         \
1204 "       mulscc  %%g1,%3,%%g1\n"                                         \
1205 "       mulscc  %%g1,%3,%%g1\n"                                         \
1206 "       mulscc  %%g1,%3,%%g1\n"                                         \
1207 "       mulscc  %%g1,%3,%%g1\n"                                         \
1208 "       mulscc  %%g1,%3,%%g1\n"                                         \
1209 "       mulscc  %%g1,%3,%%g1\n"                                         \
1210 "       mulscc  %%g1,%3,%%g1\n"                                         \
1211 "       mulscc  %%g1,%3,%%g1\n"                                         \
1212 "       mulscc  %%g1,%3,%%g1\n"                                         \
1213 "       mulscc  %%g1,%3,%%g1\n"                                         \
1214 "       mulscc  %%g1,%3,%%g1\n"                                         \
1215 "       mulscc  %%g1,%3,%%g1\n"                                         \
1216 "       mulscc  %%g1,%3,%%g1\n"                                         \
1217 "       mulscc  %%g1,%3,%%g1\n"                                         \
1218 "       mulscc  %%g1,%3,%%g1\n"                                         \
1219 "       mulscc  %%g1,%3,%%g1\n"                                         \
1220 "       mulscc  %%g1,%3,%%g1\n"                                         \
1221 "       mulscc  %%g1,%3,%%g1\n"                                         \
1222 "       mulscc  %%g1,%3,%%g1\n"                                         \
1223 "       mulscc  %%g1,%3,%%g1\n"                                         \
1224 "       mulscc  %%g1,%3,%%g1\n"                                         \
1225 "       mulscc  %%g1,%3,%%g1\n"                                         \
1226 "       mulscc  %%g1,%3,%%g1\n"                                         \
1227 "       mulscc  %%g1,%3,%%g1\n"                                         \
1228 "       mulscc  %%g1,%3,%%g1\n"                                         \
1229 "       mulscc  %%g1,%3,%%g1\n"                                         \
1230 "       mulscc  %%g1,%3,%%g1\n"                                         \
1231 "       mulscc  %%g1,%3,%%g1\n"                                         \
1232 "       mulscc  %%g1,%3,%%g1\n"                                         \
1233 "       mulscc  %%g1,%3,%%g1\n"                                         \
1234 "       mulscc  %%g1,%3,%%g1\n"                                         \
1235 "       mulscc  %%g1,0,%%g1\n"                                          \
1236 "       add     %%g1,%%o5,%0\n"                                         \
1237 "       rd      %%y,%1"                                                 \
1238            : "=r" ((USItype) (w1)),                                     \
1239              "=r" ((USItype) (w0))                                      \
1240            : "%rI" ((USItype) (u)),                                     \
1241              "r" ((USItype) (v))                                                \
1242            : "g1", "o5" __AND_CLOBBER_CC)
1243 #define UMUL_TIME 39            /* 39 instructions */
1244 /* It's quite necessary to add this much assembler for the sparc.
1245    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1246 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1247   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1248 "       mov     32,%%g1\n"                                              \
1249 "       subcc   %1,%2,%%g0\n"                                           \
1250 "1:     bcs     5f\n"                                                   \
1251 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1252 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1253 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1254 "       subcc   %%g1,1,%%g1\n"                                          \
1255 "2:     bne     1b\n"                                                   \
1256 "        subcc  %1,%2,%%g0\n"                                           \
1257 "       bcs     3f\n"                                                   \
1258 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1259 "       b       3f\n"                                                   \
1260 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1261 "4:     sub     %1,%2,%1\n"                                             \
1262 "5:     addxcc  %1,%1,%1\n"                                             \
1263 "       bcc     2b\n"                                                   \
1264 "        subcc  %%g1,1,%%g1\n"                                          \
1265 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1266 "       bne     4b\n"                                                   \
1267 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1268 "       sub     %1,%2,%1\n"                                             \
1269 "3:     xnor    %0,0,%0\n"                                              \
1270 "       ! End of inline udiv_qrnnd"                                     \
1271            : "=&r" ((USItype) (__q)),                                   \
1272              "=&r" ((USItype) (__r))                                    \
1273            : "r" ((USItype) (__d)),                                     \
1274              "1" ((USItype) (__n1)),                                    \
1275              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1276 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1277 #endif /* __sparclite__ */
1278 #endif /* __sparc_v8__ */
1279 #endif /* sparc32 */
1280
1281 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1282     && W_TYPE_SIZE == 64
1283 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1284   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1285            "add %r2,%3,%0\n\t"                                          \
1286            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1287            "add %0, 1, %0\n"                                            \
1288            "1:"                                                         \
1289            : "=r" ((UDItype)(sh)),                                      \
1290              "=&r" ((UDItype)(sl))                                      \
1291            : "%rJ" ((UDItype)(ah)),                                     \
1292              "rI" ((UDItype)(bh)),                                      \
1293              "%rJ" ((UDItype)(al)),                                     \
1294              "rI" ((UDItype)(bl))                                       \
1295            __CLOBBER_CC)
1296
1297 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1298   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1299            "sub %r2,%3,%0\n\t"                                          \
1300            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1301            "sub %0, 1, %0\n\t"                                          \
1302            "1:"                                                         \
1303            : "=r" ((UDItype)(sh)),                                      \
1304              "=&r" ((UDItype)(sl))                                      \
1305            : "rJ" ((UDItype)(ah)),                                      \
1306              "rI" ((UDItype)(bh)),                                      \
1307              "rJ" ((UDItype)(al)),                                      \
1308              "rI" ((UDItype)(bl))                                       \
1309            __CLOBBER_CC)
1310
1311 #define umul_ppmm(wh, wl, u, v)                                         \
1312   do {                                                                  \
1313           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1314           __asm__ __volatile__ (                                        \
1315                    "srl %7,0,%3\n\t"                                    \
1316                    "mulx %3,%6,%1\n\t"                                  \
1317                    "srlx %6,32,%2\n\t"                                  \
1318                    "mulx %2,%3,%4\n\t"                                  \
1319                    "sllx %4,32,%5\n\t"                                  \
1320                    "srl %6,0,%3\n\t"                                    \
1321                    "sub %1,%5,%5\n\t"                                   \
1322                    "srlx %5,32,%5\n\t"                                  \
1323                    "addcc %4,%5,%4\n\t"                                 \
1324                    "srlx %7,32,%5\n\t"                                  \
1325                    "mulx %3,%5,%3\n\t"                                  \
1326                    "mulx %2,%5,%5\n\t"                                  \
1327                    "sethi %%hi(0x80000000),%2\n\t"                      \
1328                    "addcc %4,%3,%4\n\t"                                 \
1329                    "srlx %4,32,%4\n\t"                                  \
1330                    "add %2,%2,%2\n\t"                                   \
1331                    "movcc %%xcc,%%g0,%2\n\t"                            \
1332                    "addcc %5,%4,%5\n\t"                                 \
1333                    "sllx %3,32,%3\n\t"                                  \
1334                    "add %1,%3,%1\n\t"                                   \
1335                    "add %5,%2,%0"                                       \
1336            : "=r" ((UDItype)(wh)),                                      \
1337              "=&r" ((UDItype)(wl)),                                     \
1338              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1339            : "r" ((UDItype)(u)),                                        \
1340              "r" ((UDItype)(v))                                         \
1341            __CLOBBER_CC);                                               \
1342   } while (0)
1343 #define UMUL_TIME 96
1344 #define UDIV_TIME 230
1345 #endif /* sparc64 */
1346
1347 #if defined (__vax__) && W_TYPE_SIZE == 32
1348 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1349   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1350            : "=g" ((USItype) (sh)),                                     \
1351              "=&g" ((USItype) (sl))                                     \
1352            : "%0" ((USItype) (ah)),                                     \
1353              "g" ((USItype) (bh)),                                      \
1354              "%1" ((USItype) (al)),                                     \
1355              "g" ((USItype) (bl)))
1356 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1357   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1358            : "=g" ((USItype) (sh)),                                     \
1359              "=&g" ((USItype) (sl))                                     \
1360            : "0" ((USItype) (ah)),                                      \
1361              "g" ((USItype) (bh)),                                      \
1362              "1" ((USItype) (al)),                                      \
1363              "g" ((USItype) (bl)))
1364 #define umul_ppmm(xh, xl, m0, m1) \
1365   do {                                                                  \
1366     union {                                                             \
1367         UDItype __ll;                                                   \
1368         struct {USItype __l, __h;} __i;                                 \
1369       } __xx;                                                           \
1370     USItype __m0 = (m0), __m1 = (m1);                                   \
1371     __asm__ ("emul %1,%2,$0,%0"                                         \
1372              : "=r" (__xx.__ll)                                         \
1373              : "g" (__m0),                                              \
1374                "g" (__m1));                                             \
1375     (xh) = __xx.__i.__h;                                                \
1376     (xl) = __xx.__i.__l;                                                \
1377     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1378              + (((SItype) __m1 >> 31) & __m0));                         \
1379   } while (0)
1380 #define sdiv_qrnnd(q, r, n1, n0, d) \
1381   do {                                                                  \
1382     union {DItype __ll;                                                 \
1383            struct {SItype __l, __h;} __i;                               \
1384           } __xx;                                                       \
1385     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1386     __asm__ ("ediv %3,%2,%0,%1"                                         \
1387              : "=g" (q), "=g" (r)                                       \
1388              : "g" (__xx.__ll), "g" (d));                               \
1389   } while (0)
1390 #endif /* __vax__ */
1391
1392 #ifdef _TMS320C6X
1393 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1394   do                                                                    \
1395     {                                                                   \
1396       UDItype __ll;                                                     \
1397       __asm__ ("addu .l1 %1, %2, %0"                                    \
1398                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1399       (sl) = (USItype)__ll;                                             \
1400       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1401     }                                                                   \
1402   while (0)
1403
1404 #ifdef _TMS320C6400_PLUS
1405 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1406 #define umul_ppmm(w1, w0, u, v)                                         \
1407   do {                                                                  \
1408     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1409     (w1) = (USItype) (__x >> 32);                                       \
1410     (w0) = (USItype) (__x);                                             \
1411   } while (0)
1412 #endif  /* _TMS320C6400_PLUS */
1413
1414 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1415 #ifdef _TMS320C6400
1416 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1417 #endif
1418 #define UMUL_TIME 4
1419 #define UDIV_TIME 40
1420 #endif /* _TMS320C6X */
1421
1422 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1423 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1424    to expand builtin functions depending on what configuration features
1425    are available.  This avoids library calls when the operation can be
1426    performed in-line.  */
1427 #define umul_ppmm(w1, w0, u, v)                                         \
1428   do {                                                                  \
1429     DWunion __w;                                                        \
1430     __w.ll = __builtin_umulsidi3 (u, v);                                \
1431     w1 = __w.s.high;                                                    \
1432     w0 = __w.s.low;                                                     \
1433   } while (0)
1434 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1435 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1436 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1437 #endif /* __xtensa__ */
1438
1439 #if defined xstormy16
1440 extern UHItype __stormy16_count_leading_zeros (UHItype);
1441 #define count_leading_zeros(count, x)                                   \
1442   do                                                                    \
1443     {                                                                   \
1444       UHItype size;                                                     \
1445                                                                         \
1446       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1447       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1448         {                                                               \
1449           UHItype c;                                                    \
1450                                                                         \
1451           c = __clzhi2 ((x) >> (size - 16));                            \
1452           (count) += c;                                                 \
1453           if (c != 16)                                                  \
1454             break;                                                      \
1455         }                                                               \
1456     }                                                                   \
1457   while (0)
1458 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1459 #endif
1460
1461 #if defined (__z8000__) && W_TYPE_SIZE == 16
1462 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1463   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1464            : "=r" ((unsigned int)(sh)),                                 \
1465              "=&r" ((unsigned int)(sl))                                 \
1466            : "%0" ((unsigned int)(ah)),                                 \
1467              "r" ((unsigned int)(bh)),                                  \
1468              "%1" ((unsigned int)(al)),                                 \
1469              "rQR" ((unsigned int)(bl)))
1470 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1471   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1472            : "=r" ((unsigned int)(sh)),                                 \
1473              "=&r" ((unsigned int)(sl))                                 \
1474            : "0" ((unsigned int)(ah)),                                  \
1475              "r" ((unsigned int)(bh)),                                  \
1476              "1" ((unsigned int)(al)),                                  \
1477              "rQR" ((unsigned int)(bl)))
1478 #define umul_ppmm(xh, xl, m0, m1) \
1479   do {                                                                  \
1480     union {long int __ll;                                               \
1481            struct {unsigned int __h, __l;} __i;                         \
1482           } __xx;                                                       \
1483     unsigned int __m0 = (m0), __m1 = (m1);                              \
1484     __asm__ ("mult      %S0,%H3"                                        \
1485              : "=r" (__xx.__i.__h),                                     \
1486                "=r" (__xx.__i.__l)                                      \
1487              : "%1" (__m0),                                             \
1488                "rQR" (__m1));                                           \
1489     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1490     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1491              + (((signed int) __m1 >> 15) & __m0));                     \
1492   } while (0)
1493 #endif /* __z8000__ */
1494
1495 #endif /* __GNUC__ */
1496
1497 /* If this machine has no inline assembler, use C macros.  */
1498
1499 #if !defined (add_ssaaaa)
1500 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1501   do {                                                                  \
1502     UWtype __x;                                                         \
1503     __x = (al) + (bl);                                                  \
1504     (sh) = (ah) + (bh) + (__x < (al));                                  \
1505     (sl) = __x;                                                         \
1506   } while (0)
1507 #endif
1508
1509 #if !defined (sub_ddmmss)
1510 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1511   do {                                                                  \
1512     UWtype __x;                                                         \
1513     __x = (al) - (bl);                                                  \
1514     (sh) = (ah) - (bh) - (__x > (al));                                  \
1515     (sl) = __x;                                                         \
1516   } while (0)
1517 #endif
1518
1519 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1520    smul_ppmm.  */
1521 #if !defined (umul_ppmm) && defined (smul_ppmm)
1522 #define umul_ppmm(w1, w0, u, v)                                         \
1523   do {                                                                  \
1524     UWtype __w1;                                                        \
1525     UWtype __xm0 = (u), __xm1 = (v);                                    \
1526     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1527     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1528                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1529   } while (0)
1530 #endif
1531
1532 /* If we still don't have umul_ppmm, define it using plain C.  */
1533 #if !defined (umul_ppmm)
1534 #define umul_ppmm(w1, w0, u, v)                                         \
1535   do {                                                                  \
1536     UWtype __x0, __x1, __x2, __x3;                                      \
1537     UHWtype __ul, __vl, __uh, __vh;                                     \
1538                                                                         \
1539     __ul = __ll_lowpart (u);                                            \
1540     __uh = __ll_highpart (u);                                           \
1541     __vl = __ll_lowpart (v);                                            \
1542     __vh = __ll_highpart (v);                                           \
1543                                                                         \
1544     __x0 = (UWtype) __ul * __vl;                                        \
1545     __x1 = (UWtype) __ul * __vh;                                        \
1546     __x2 = (UWtype) __uh * __vl;                                        \
1547     __x3 = (UWtype) __uh * __vh;                                        \
1548                                                                         \
1549     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1550     __x1 += __x2;               /* but this indeed can */               \
1551     if (__x1 < __x2)            /* did we get it? */                    \
1552       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1553                                                                         \
1554     (w1) = __x3 + __ll_highpart (__x1);                                 \
1555     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1556   } while (0)
1557 #endif
1558
1559 #if !defined (__umulsidi3)
1560 #define __umulsidi3(u, v) \
1561   ({DWunion __w;                                                        \
1562     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1563     __w.ll; })
1564 #endif
1565
1566 /* Define this unconditionally, so it can be used for debugging.  */
1567 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1568   do {                                                                  \
1569     UWtype __d1, __d0, __q1, __q0;                                      \
1570     UWtype __r1, __r0, __m;                                             \
1571     __d1 = __ll_highpart (d);                                           \
1572     __d0 = __ll_lowpart (d);                                            \
1573                                                                         \
1574     __r1 = (n1) % __d1;                                                 \
1575     __q1 = (n1) / __d1;                                                 \
1576     __m = (UWtype) __q1 * __d0;                                         \
1577     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1578     if (__r1 < __m)                                                     \
1579       {                                                                 \
1580         __q1--, __r1 += (d);                                            \
1581         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1582           if (__r1 < __m)                                               \
1583             __q1--, __r1 += (d);                                        \
1584       }                                                                 \
1585     __r1 -= __m;                                                        \
1586                                                                         \
1587     __r0 = __r1 % __d1;                                                 \
1588     __q0 = __r1 / __d1;                                                 \
1589     __m = (UWtype) __q0 * __d0;                                         \
1590     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1591     if (__r0 < __m)                                                     \
1592       {                                                                 \
1593         __q0--, __r0 += (d);                                            \
1594         if (__r0 >= (d))                                                \
1595           if (__r0 < __m)                                               \
1596             __q0--, __r0 += (d);                                        \
1597       }                                                                 \
1598     __r0 -= __m;                                                        \
1599                                                                         \
1600     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1601     (r) = __r0;                                                         \
1602   } while (0)
1603
1604 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1605    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1606 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1607 #define udiv_qrnnd(q, r, nh, nl, d) \
1608   do {                                                                  \
1609     USItype __r;                                                        \
1610     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1611     (r) = __r;                                                          \
1612   } while (0)
1613 #endif
1614
1615 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1616 #if !defined (udiv_qrnnd)
1617 #define UDIV_NEEDS_NORMALIZATION 1
1618 #define udiv_qrnnd __udiv_qrnnd_c
1619 #endif
1620
1621 #if !defined (count_leading_zeros)
1622 #define count_leading_zeros(count, x) \
1623   do {                                                                  \
1624     UWtype __xr = (x);                                                  \
1625     UWtype __a;                                                         \
1626                                                                         \
1627     if (W_TYPE_SIZE <= 32)                                              \
1628       {                                                                 \
1629         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1630           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1631           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1632       }                                                                 \
1633     else                                                                \
1634       {                                                                 \
1635         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1636           if (((__xr >> __a) & 0xff) != 0)                              \
1637             break;                                                      \
1638       }                                                                 \
1639                                                                         \
1640     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1641   } while (0)
1642 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1643 #endif
1644
1645 #if !defined (count_trailing_zeros)
1646 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1647    defined in asm, but if it is not, the C version above is good enough.  */
1648 #define count_trailing_zeros(count, x) \
1649   do {                                                                  \
1650     UWtype __ctz_x = (x);                                               \
1651     UWtype __ctz_c;                                                     \
1652     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1653     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1654   } while (0)
1655 #endif
1656
1657 #ifndef UDIV_NEEDS_NORMALIZATION
1658 #define UDIV_NEEDS_NORMALIZATION 0
1659 #endif