libgcc/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, write to the Free
  29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  30    MA 02110-1301, USA.  */
  31
  32 /* You have to define the following before including this file:
  33
  34    UWtype -- An unsigned type, default type for operations (typically a "word")
  35    UHWtype -- An unsigned type, at least half the size of UWtype.
  36    UDWtype -- An unsigned type, at least twice as large a UWtype
  37    W_TYPE_SIZE -- size in bits of UWtype
  38
  39    UQItype -- Unsigned 8 bit type.
  40    SItype, USItype -- Signed and unsigned 32 bit types.
  41    DItype, UDItype -- Signed and unsigned 64 bit types.
  42
  43    On a 32 bit machine UWtype should typically be USItype;
  44    on a 64 bit machine, UWtype should typically be UDItype.  */
  45
  46 #define __BITS4 (W_TYPE_SIZE / 4)
  47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  50
  51 #ifndef W_TYPE_SIZE
  52 #define W_TYPE_SIZE     32
  53 #define UWtype          USItype
  54 #define UHWtype         USItype
  55 #define UDWtype         UDItype
  56 #endif
  57
  58 /* Used in glibc only.  */
  59 #ifndef attribute_hidden
  60 #define attribute_hidden
  61 #endif
  62
  63 extern const UQItype __clz_tab[256] attribute_hidden;
  64
  65 /* Define auxiliary asm macros.
  66
  67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  69    word product in HIGH_PROD and LOW_PROD.
  70
  71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  72    UDWtype product.  This is just a variant of umul_ppmm.
  73
  74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  75    denominator) divides a UDWtype, composed by the UWtype integers
  76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  78    than DENOMINATOR for correct operation.  If, in addition, the most
  79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  80    UDIV_NEEDS_NORMALIZATION is defined to 1.
  81
  82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  84    is rounded towards 0.
  85
  86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  87    msb to the first nonzero bit in the UWtype X.  This is the number of
  88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  90
  91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  92    from the least significant end.
  93
  94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  98    (i.e. carry out) is not stored anywhere, and is lost.
  99
 100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 105    and is lost.
 106
 107    If any of these macros are left undefined for a particular CPU,
 108    C macros are used.  */
 109
 110 /* The CPUs come in alphabetical order below.
 111
 112    Please add support for more CPUs here, or improve the current support
 113    for the CPUs below!
 114    (E.g. WE32100, IBM360.)  */
 115
 116 #if defined (__GNUC__) && !defined (NO_ASM)
 117
 118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 119    understood by gcc1.  Use cpp to avoid major code duplication.  */
 120 #if __GNUC__ < 2
 121 #define __CLOBBER_CC
 122 #define __AND_CLOBBER_CC
 123 #else /* __GNUC__ >= 2 */
 124 #define __CLOBBER_CC : "cc"
 125 #define __AND_CLOBBER_CC , "cc"
 126 #endif /* __GNUC__ < 2 */
 127
 128 #if defined (__alpha) && W_TYPE_SIZE == 64
 129 #define umul_ppmm(ph, pl, m0, m1) \
 130   do {                                                                  \
 131     UDItype __m0 = (m0), __m1 = (m1);                                   \
 132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 133     (pl) = __m0 * __m1;                                                 \
 134   } while (0)
 135 #define UMUL_TIME 46
 136 #ifndef LONGLONG_STANDALONE
 137 #define udiv_qrnnd(q, r, n1, n0, d) \
 138   do { UDItype __r;                                                     \
 139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 140     (r) = __r;                                                          \
 141   } while (0)
 142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 143 #define UDIV_TIME 220
 144 #endif /* LONGLONG_STANDALONE */
 145 #ifdef __alpha_cix__
 146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 148 #define COUNT_LEADING_ZEROS_0 64
 149 #else
 150 #define count_leading_zeros(COUNT,X) \
 151   do {                                                                  \
 152     UDItype __xr = (X), __t, __a;                                       \
 153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 155     __t = __builtin_alpha_extbl (__xr, __a);                            \
 156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 157   } while (0)
 158 #define count_trailing_zeros(COUNT,X) \
 159   do {                                                                  \
 160     UDItype __xr = (X), __t, __a;                                       \
 161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 162     __t = ~__t & -~__t;                                                 \
 163     __a = ((__t & 0xCC) != 0) * 2;                                      \
 164     __a += ((__t & 0xF0) != 0) * 4;                                     \
 165     __a += ((__t & 0xAA) != 0);                                         \
 166     __t = __builtin_alpha_extbl (__xr, __a);                            \
 167     __a <<= 3;                                                          \
 168     __t &= -__t;                                                        \
 169     __a += ((__t & 0xCC) != 0) * 2;                                     \
 170     __a += ((__t & 0xF0) != 0) * 4;                                     \
 171     __a += ((__t & 0xAA) != 0);                                         \
 172     (COUNT) = __a;                                                      \
 173   } while (0)
 174 #endif /* __alpha_cix__ */
 175 #endif /* __alpha */
 176
 177 #if defined (__arc__) && W_TYPE_SIZE == 32
 178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 180            : "=r" ((USItype) (sh)),                                     \
 181              "=&r" ((USItype) (sl))                                     \
 182            : "%r" ((USItype) (ah)),                                     \
 183              "rIJ" ((USItype) (bh)),                                    \
 184              "%r" ((USItype) (al)),                                     \
 185              "rIJ" ((USItype) (bl)))
 186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 188            : "=r" ((USItype) (sh)),                                     \
 189              "=&r" ((USItype) (sl))                                     \
 190            : "r" ((USItype) (ah)),                                      \
 191              "rIJ" ((USItype) (bh)),                                    \
 192              "r" ((USItype) (al)),                                      \
 193              "rIJ" ((USItype) (bl)))
 194 /* Call libgcc routine.  */
 195 #define umul_ppmm(w1, w0, u, v) \
 196 do {                                                                    \
 197   DWunion __w;                                                          \
 198   __w.ll = __umulsidi3 (u, v);                                          \
 199   w1 = __w.s.high;                                                      \
 200   w0 = __w.s.low;                                                       \
 201 } while (0)
 202 #define __umulsidi3 __umulsidi3
 203 UDItype __umulsidi3 (USItype, USItype);
 204 #endif
 205
 206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 209            : "=r" ((USItype) (sh)),                                     \
 210              "=&r" ((USItype) (sl))                                     \
 211            : "%r" ((USItype) (ah)),                                     \
 212              "rI" ((USItype) (bh)),                                     \
 213              "%r" ((USItype) (al)),                                     \
 214              "rI" ((USItype) (bl)) __CLOBBER_CC)
 215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 217            : "=r" ((USItype) (sh)),                                     \
 218              "=&r" ((USItype) (sl))                                     \
 219            : "r" ((USItype) (ah)),                                      \
 220              "rI" ((USItype) (bh)),                                     \
 221              "r" ((USItype) (al)),                                      \
 222              "rI" ((USItype) (bl)) __CLOBBER_CC)
 223 #define umul_ppmm(xh, xl, a, b) \
 224 {register USItype __t0, __t1, __t2;                                     \
 225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
 226            "    mov     %2, %5, lsr #16\n"                              \
 227            "    mov     %0, %6, lsr #16\n"                              \
 228            "    bic     %3, %5, %2, lsl #16\n"                          \
 229            "    bic     %4, %6, %0, lsl #16\n"                          \
 230            "    mul     %1, %3, %4\n"                                   \
 231            "    mul     %4, %2, %4\n"                                   \
 232            "    mul     %3, %0, %3\n"                                   \
 233            "    mul     %0, %2, %0\n"                                   \
 234            "    adds    %3, %4, %3\n"                                   \
 235            "    addcs   %0, %0, #65536\n"                               \
 236            "    adds    %1, %1, %3, lsl #16\n"                          \
 237            "    adc     %0, %0, %3, lsr #16"                            \
 238            : "=&r" ((USItype) (xh)),                                    \
 239              "=r" ((USItype) (xl)),                                     \
 240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 241            : "r" ((USItype) (a)),                                       \
 242              "r" ((USItype) (b)) __CLOBBER_CC );}
 243 #define UMUL_TIME 20
 244 #define UDIV_TIME 100
 245 #endif /* __arm__ */
 246
 247 #if defined(__arm__)
 248 /* Let gcc decide how best to implement count_leading_zeros.  */
 249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 250 #define COUNT_LEADING_ZEROS_0 32
 251 #endif
 252
 253 #if defined (__AVR__)
 254
 255 #if W_TYPE_SIZE == 16
 256 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 257 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
 258 #define COUNT_LEADING_ZEROS_0 16
 259 #endif /* W_TYPE_SIZE == 16 */
 260
 261 #if W_TYPE_SIZE == 32
 262 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
 263 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
 264 #define COUNT_LEADING_ZEROS_0 32
 265 #endif /* W_TYPE_SIZE == 32 */
 266
 267 #if W_TYPE_SIZE == 64
 268 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
 269 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
 270 #define COUNT_LEADING_ZEROS_0 64
 271 #endif /* W_TYPE_SIZE == 64 */
 272
 273 #endif /* defined (__AVR__) */
 274
 275 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 276 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 277 #if __CRIS_arch_version >= 8
 278 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 279 #endif
 280 #endif /* __CRIS__ */
 281
 282 #if defined (__hppa) && W_TYPE_SIZE == 32
 283 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 284   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 285            : "=r" ((USItype) (sh)),                                     \
 286              "=&r" ((USItype) (sl))                                     \
 287            : "%rM" ((USItype) (ah)),                                    \
 288              "rM" ((USItype) (bh)),                                     \
 289              "%rM" ((USItype) (al)),                                    \
 290              "rM" ((USItype) (bl)))
 291 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 292   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 293            : "=r" ((USItype) (sh)),                                     \
 294              "=&r" ((USItype) (sl))                                     \
 295            : "rM" ((USItype) (ah)),                                     \
 296              "rM" ((USItype) (bh)),                                     \
 297              "rM" ((USItype) (al)),                                     \
 298              "rM" ((USItype) (bl)))
 299 #if defined (_PA_RISC1_1)
 300 #define umul_ppmm(w1, w0, u, v) \
 301   do {                                                                  \
 302     union                                                               \
 303       {                                                                 \
 304         UDItype __f;                                                    \
 305         struct {USItype __w1, __w0;} __w1w0;                            \
 306       } __t;                                                            \
 307     __asm__ ("xmpyu %1,%2,%0"                                           \
 308              : "=x" (__t.__f)                                           \
 309              : "x" ((USItype) (u)),                                     \
 310                "x" ((USItype) (v)));                                    \
 311     (w1) = __t.__w1w0.__w1;                                             \
 312     (w0) = __t.__w1w0.__w0;                                             \
 313      } while (0)
 314 #define UMUL_TIME 8
 315 #else
 316 #define UMUL_TIME 30
 317 #endif
 318 #define UDIV_TIME 40
 319 #define count_leading_zeros(count, x) \
 320   do {                                                                  \
 321     USItype __tmp;                                                      \
 322     __asm__ (                                                           \
 323        "ldi             1,%0\n"                                         \
 324 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 325 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 326 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 327 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 328 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 329 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 330 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 331 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 332 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 333 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 334 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 335 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 336 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 337 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 338         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 339   } while (0)
 340 #endif
 341
 342 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 343 #if !defined (__zarch__)
 344 #define smul_ppmm(xh, xl, m0, m1) \
 345   do {                                                                  \
 346     union {DItype __ll;                                                 \
 347            struct {USItype __h, __l;} __i;                              \
 348           } __x;                                                        \
 349     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 350              : "=&r" (__x.__ll)                                         \
 351              : "r" (m0), "r" (m1));                                     \
 352     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 353   } while (0)
 354 #define sdiv_qrnnd(q, r, n1, n0, d) \
 355   do {                                                                  \
 356     union {DItype __ll;                                                 \
 357            struct {USItype __h, __l;} __i;                              \
 358           } __x;                                                        \
 359     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 360     __asm__ ("dr %0,%2"                                                 \
 361              : "=r" (__x.__ll)                                          \
 362              : "0" (__x.__ll), "r" (d));                                \
 363     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 364   } while (0)
 365 #else
 366 #define smul_ppmm(xh, xl, m0, m1) \
 367   do {                                                                  \
 368     register SItype __r0 __asm__ ("0");                                 \
 369     register SItype __r1 __asm__ ("1") = (m0);                          \
 370                                                                         \
 371     __asm__ ("mr\t%%r0,%3"                                              \
 372              : "=r" (__r0), "=r" (__r1)                                 \
 373              : "r"  (__r1),  "r" (m1));                                 \
 374     (xh) = __r0; (xl) = __r1;                                           \
 375   } while (0)
 376
 377 #define sdiv_qrnnd(q, r, n1, n0, d) \
 378   do {                                                                  \
 379     register SItype __r0 __asm__ ("0") = (n1);                          \
 380     register SItype __r1 __asm__ ("1") = (n0);                          \
 381                                                                         \
 382     __asm__ ("dr\t%%r0,%4"                                              \
 383              : "=r" (__r0), "=r" (__r1)                                 \
 384              : "r" (__r0), "r" (__r1), "r" (d));                        \
 385     (q) = __r1; (r) = __r0;                                             \
 386   } while (0)
 387 #endif /* __zarch__ */
 388 #endif
 389
 390 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 391 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 392   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 393            : "=r" ((USItype) (sh)),                                     \
 394              "=&r" ((USItype) (sl))                                     \
 395            : "%0" ((USItype) (ah)),                                     \
 396              "g" ((USItype) (bh)),                                      \
 397              "%1" ((USItype) (al)),                                     \
 398              "g" ((USItype) (bl)))
 399 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 400   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 401            : "=r" ((USItype) (sh)),                                     \
 402              "=&r" ((USItype) (sl))                                     \
 403            : "0" ((USItype) (ah)),                                      \
 404              "g" ((USItype) (bh)),                                      \
 405              "1" ((USItype) (al)),                                      \
 406              "g" ((USItype) (bl)))
 407 #define umul_ppmm(w1, w0, u, v) \
 408   __asm__ ("mul{l} %3"                                                  \
 409            : "=a" ((USItype) (w0)),                                     \
 410              "=d" ((USItype) (w1))                                      \
 411            : "%0" ((USItype) (u)),                                      \
 412              "rm" ((USItype) (v)))
 413 #define udiv_qrnnd(q, r, n1, n0, dv) \
 414   __asm__ ("div{l} %4"                                                  \
 415            : "=a" ((USItype) (q)),                                      \
 416              "=d" ((USItype) (r))                                       \
 417            : "0" ((USItype) (n0)),                                      \
 418              "1" ((USItype) (n1)),                                      \
 419              "rm" ((USItype) (dv)))
 420 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 421 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 422 #define UMUL_TIME 40
 423 #define UDIV_TIME 40
 424 #endif /* 80x86 */
 425
 426 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 427 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 428   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 429            : "=r" ((UDItype) (sh)),                                     \
 430              "=&r" ((UDItype) (sl))                                     \
 431            : "%0" ((UDItype) (ah)),                                     \
 432              "rme" ((UDItype) (bh)),                                    \
 433              "%1" ((UDItype) (al)),                                     \
 434              "rme" ((UDItype) (bl)))
 435 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 436   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 437            : "=r" ((UDItype) (sh)),                                     \
 438              "=&r" ((UDItype) (sl))                                     \
 439            : "0" ((UDItype) (ah)),                                      \
 440              "rme" ((UDItype) (bh)),                                    \
 441              "1" ((UDItype) (al)),                                      \
 442              "rme" ((UDItype) (bl)))
 443 #define umul_ppmm(w1, w0, u, v) \
 444   __asm__ ("mul{q} %3"                                                  \
 445            : "=a" ((UDItype) (w0)),                                     \
 446              "=d" ((UDItype) (w1))                                      \
 447            : "%0" ((UDItype) (u)),                                      \
 448              "rm" ((UDItype) (v)))
 449 #define udiv_qrnnd(q, r, n1, n0, dv) \
 450   __asm__ ("div{q} %4"                                                  \
 451            : "=a" ((UDItype) (q)),                                      \
 452              "=d" ((UDItype) (r))                                       \
 453            : "0" ((UDItype) (n0)),                                      \
 454              "1" ((UDItype) (n1)),                                      \
 455              "rm" ((UDItype) (dv)))
 456 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
 457 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
 458 #define UMUL_TIME 40
 459 #define UDIV_TIME 40
 460 #endif /* x86_64 */
 461
 462 #if defined (__i960__) && W_TYPE_SIZE == 32
 463 #define umul_ppmm(w1, w0, u, v) \
 464   ({union {UDItype __ll;                                                \
 465            struct {USItype __l, __h;} __i;                              \
 466           } __xx;                                                       \
 467   __asm__ ("emul        %2,%1,%0"                                       \
 468            : "=d" (__xx.__ll)                                           \
 469            : "%dI" ((USItype) (u)),                                     \
 470              "dI" ((USItype) (v)));                                     \
 471   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 472 #define __umulsidi3(u, v) \
 473   ({UDItype __w;                                                        \
 474     __asm__ ("emul      %2,%1,%0"                                       \
 475              : "=d" (__w)                                               \
 476              : "%dI" ((USItype) (u)),                                   \
 477                "dI" ((USItype) (v)));                                   \
 478     __w; })
 479 #endif /* __i960__ */
 480
 481 #if defined (__ia64) && W_TYPE_SIZE == 64
 482 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 483    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 484    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 485    register, which takes an extra cycle.  */
 486 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 487   do {                                                                  \
 488     UWtype __x;                                                         \
 489     __x = (al) - (bl);                                                  \
 490     if ((al) < (bl))                                                    \
 491       (sh) = (ah) - (bh) - 1;                                           \
 492     else                                                                \
 493       (sh) = (ah) - (bh);                                               \
 494     (sl) = __x;                                                         \
 495   } while (0)
 496
 497 /* Do both product parts in assembly, since that gives better code with
 498    all gcc versions.  Some callers will just use the upper part, and in
 499    that situation we waste an instruction, but not any cycles.  */
 500 #define umul_ppmm(ph, pl, m0, m1)                                       \
 501   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 502            : "=&f" (ph), "=f" (pl)                                      \
 503            : "f" (m0), "f" (m1))
 504 #define count_leading_zeros(count, x)                                   \
 505   do {                                                                  \
 506     UWtype _x = (x), _y, _a, _c;                                        \
 507     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 508     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 509     _c = (_a - 1) << 3;                                                 \
 510     _x >>= _c;                                                          \
 511     if (_x >= 1 << 4)                                                   \
 512       _x >>= 4, _c += 4;                                                \
 513     if (_x >= 1 << 2)                                                   \
 514       _x >>= 2, _c += 2;                                                \
 515     _c += _x >> 1;                                                      \
 516     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 517   } while (0)
 518 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 519    based, and we don't need a special case for x==0 here */
 520 #define count_trailing_zeros(count, x)                                  \
 521   do {                                                                  \
 522     UWtype __ctz_x = (x);                                               \
 523     __asm__ ("popcnt %0 = %1"                                           \
 524              : "=r" (count)                                             \
 525              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 526   } while (0)
 527 #define UMUL_TIME 14
 528 #endif
 529
 530 #if defined (__M32R__) && W_TYPE_SIZE == 32
 531 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 532   /* The cmp clears the condition bit.  */ \
 533   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 534            : "=r" ((USItype) (sh)),                                     \
 535              "=&r" ((USItype) (sl))                                     \
 536            : "0" ((USItype) (ah)),                                      \
 537              "r" ((USItype) (bh)),                                      \
 538              "1" ((USItype) (al)),                                      \
 539              "r" ((USItype) (bl))                                       \
 540            : "cbit")
 541 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 542   /* The cmp clears the condition bit.  */ \
 543   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 544            : "=r" ((USItype) (sh)),                                     \
 545              "=&r" ((USItype) (sl))                                     \
 546            : "0" ((USItype) (ah)),                                      \
 547              "r" ((USItype) (bh)),                                      \
 548              "1" ((USItype) (al)),                                      \
 549              "r" ((USItype) (bl))                                       \
 550            : "cbit")
 551 #endif /* __M32R__ */
 552
 553 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 554 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 555   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 556            : "=d" ((USItype) (sh)),                                     \
 557              "=&d" ((USItype) (sl))                                     \
 558            : "%0" ((USItype) (ah)),                                     \
 559              "d" ((USItype) (bh)),                                      \
 560              "%1" ((USItype) (al)),                                     \
 561              "g" ((USItype) (bl)))
 562 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 563   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 564            : "=d" ((USItype) (sh)),                                     \
 565              "=&d" ((USItype) (sl))                                     \
 566            : "0" ((USItype) (ah)),                                      \
 567              "d" ((USItype) (bh)),                                      \
 568              "1" ((USItype) (al)),                                      \
 569              "g" ((USItype) (bl)))
 570
 571 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 572 #if (defined (__mc68020__) && !defined (__mc68060__))
 573 #define umul_ppmm(w1, w0, u, v) \
 574   __asm__ ("mulu%.l %3,%1:%0"                                           \
 575            : "=d" ((USItype) (w0)),                                     \
 576              "=d" ((USItype) (w1))                                      \
 577            : "%0" ((USItype) (u)),                                      \
 578              "dmi" ((USItype) (v)))
 579 #define UMUL_TIME 45
 580 #define udiv_qrnnd(q, r, n1, n0, d) \
 581   __asm__ ("divu%.l %4,%1:%0"                                           \
 582            : "=d" ((USItype) (q)),                                      \
 583              "=d" ((USItype) (r))                                       \
 584            : "0" ((USItype) (n0)),                                      \
 585              "1" ((USItype) (n1)),                                      \
 586              "dmi" ((USItype) (d)))
 587 #define UDIV_TIME 90
 588 #define sdiv_qrnnd(q, r, n1, n0, d) \
 589   __asm__ ("divs%.l %4,%1:%0"                                           \
 590            : "=d" ((USItype) (q)),                                      \
 591              "=d" ((USItype) (r))                                       \
 592            : "0" ((USItype) (n0)),                                      \
 593              "1" ((USItype) (n1)),                                      \
 594              "dmi" ((USItype) (d)))
 595
 596 #elif defined (__mcoldfire__) /* not mc68020 */
 597
 598 #define umul_ppmm(xh, xl, a, b) \
 599   __asm__ ("| Inlined umul_ppmm\n"                                      \
 600            "    move%.l %2,%/d0\n"                                      \
 601            "    move%.l %3,%/d1\n"                                      \
 602            "    move%.l %/d0,%/d2\n"                                    \
 603            "    swap    %/d0\n"                                         \
 604            "    move%.l %/d1,%/d3\n"                                    \
 605            "    swap    %/d1\n"                                         \
 606            "    move%.w %/d2,%/d4\n"                                    \
 607            "    mulu    %/d3,%/d4\n"                                    \
 608            "    mulu    %/d1,%/d2\n"                                    \
 609            "    mulu    %/d0,%/d3\n"                                    \
 610            "    mulu    %/d0,%/d1\n"                                    \
 611            "    move%.l %/d4,%/d0\n"                                    \
 612            "    clr%.w  %/d0\n"                                         \
 613            "    swap    %/d0\n"                                         \
 614            "    add%.l  %/d0,%/d2\n"                                    \
 615            "    add%.l  %/d3,%/d2\n"                                    \
 616            "    jcc     1f\n"                                           \
 617            "    add%.l  %#65536,%/d1\n"                                 \
 618            "1:  swap    %/d2\n"                                         \
 619            "    moveq   %#0,%/d0\n"                                     \
 620            "    move%.w %/d2,%/d0\n"                                    \
 621            "    move%.w %/d4,%/d2\n"                                    \
 622            "    move%.l %/d2,%1\n"                                      \
 623            "    add%.l  %/d1,%/d0\n"                                    \
 624            "    move%.l %/d0,%0"                                        \
 625            : "=g" ((USItype) (xh)),                                     \
 626              "=g" ((USItype) (xl))                                      \
 627            : "g" ((USItype) (a)),                                       \
 628              "g" ((USItype) (b))                                        \
 629            : "d0", "d1", "d2", "d3", "d4")
 630 #define UMUL_TIME 100
 631 #define UDIV_TIME 400
 632 #else /* not ColdFire */
 633 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 634 #define umul_ppmm(xh, xl, a, b) \
 635   __asm__ ("| Inlined umul_ppmm\n"                                      \
 636            "    move%.l %2,%/d0\n"                                      \
 637            "    move%.l %3,%/d1\n"                                      \
 638            "    move%.l %/d0,%/d2\n"                                    \
 639            "    swap    %/d0\n"                                         \
 640            "    move%.l %/d1,%/d3\n"                                    \
 641            "    swap    %/d1\n"                                         \
 642            "    move%.w %/d2,%/d4\n"                                    \
 643            "    mulu    %/d3,%/d4\n"                                    \
 644            "    mulu    %/d1,%/d2\n"                                    \
 645            "    mulu    %/d0,%/d3\n"                                    \
 646            "    mulu    %/d0,%/d1\n"                                    \
 647            "    move%.l %/d4,%/d0\n"                                    \
 648            "    eor%.w  %/d0,%/d0\n"                                    \
 649            "    swap    %/d0\n"                                         \
 650            "    add%.l  %/d0,%/d2\n"                                    \
 651            "    add%.l  %/d3,%/d2\n"                                    \
 652            "    jcc     1f\n"                                           \
 653            "    add%.l  %#65536,%/d1\n"                                 \
 654            "1:  swap    %/d2\n"                                         \
 655            "    moveq   %#0,%/d0\n"                                     \
 656            "    move%.w %/d2,%/d0\n"                                    \
 657            "    move%.w %/d4,%/d2\n"                                    \
 658            "    move%.l %/d2,%1\n"                                      \
 659            "    add%.l  %/d1,%/d0\n"                                    \
 660            "    move%.l %/d0,%0"                                        \
 661            : "=g" ((USItype) (xh)),                                     \
 662              "=g" ((USItype) (xl))                                      \
 663            : "g" ((USItype) (a)),                                       \
 664              "g" ((USItype) (b))                                        \
 665            : "d0", "d1", "d2", "d3", "d4")
 666 #define UMUL_TIME 100
 667 #define UDIV_TIME 400
 668
 669 #endif /* not mc68020 */
 670
 671 /* The '020, '030, '040 and '060 have bitfield insns.
 672    cpu32 disguises as a 68020, but lacks them.  */
 673 #if defined (__mc68020__) && !defined (__mcpu32__)
 674 #define count_leading_zeros(count, x) \
 675   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 676            : "=d" ((USItype) (count))                                   \
 677            : "od" ((USItype) (x)), "n" (0))
 678 /* Some ColdFire architectures have a ff1 instruction supported via
 679    __builtin_clz. */
 680 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 681 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 682 #define COUNT_LEADING_ZEROS_0 32
 683 #endif
 684 #endif /* mc68000 */
 685
 686 #if defined (__m88000__) && W_TYPE_SIZE == 32
 687 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 688   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 689            : "=r" ((USItype) (sh)),                                     \
 690              "=&r" ((USItype) (sl))                                     \
 691            : "%rJ" ((USItype) (ah)),                                    \
 692              "rJ" ((USItype) (bh)),                                     \
 693              "%rJ" ((USItype) (al)),                                    \
 694              "rJ" ((USItype) (bl)))
 695 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 696   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 697            : "=r" ((USItype) (sh)),                                     \
 698              "=&r" ((USItype) (sl))                                     \
 699            : "rJ" ((USItype) (ah)),                                     \
 700              "rJ" ((USItype) (bh)),                                     \
 701              "rJ" ((USItype) (al)),                                     \
 702              "rJ" ((USItype) (bl)))
 703 #define count_leading_zeros(count, x) \
 704   do {                                                                  \
 705     USItype __cbtmp;                                                    \
 706     __asm__ ("ff1 %0,%1"                                                \
 707              : "=r" (__cbtmp)                                           \
 708              : "r" ((USItype) (x)));                                    \
 709     (count) = __cbtmp ^ 31;                                             \
 710   } while (0)
 711 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 712 #if defined (__mc88110__)
 713 #define umul_ppmm(wh, wl, u, v) \
 714   do {                                                                  \
 715     union {UDItype __ll;                                                \
 716            struct {USItype __h, __l;} __i;                              \
 717           } __xx;                                                       \
 718     __asm__ ("mulu.d    %0,%1,%2"                                       \
 719              : "=r" (__xx.__ll)                                         \
 720              : "r" ((USItype) (u)),                                     \
 721                "r" ((USItype) (v)));                                    \
 722     (wh) = __xx.__i.__h;                                                \
 723     (wl) = __xx.__i.__l;                                                \
 724   } while (0)
 725 #define udiv_qrnnd(q, r, n1, n0, d) \
 726   ({union {UDItype __ll;                                                \
 727            struct {USItype __h, __l;} __i;                              \
 728           } __xx;                                                       \
 729   USItype __q;                                                          \
 730   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 731   __asm__ ("divu.d %0,%1,%2"                                            \
 732            : "=r" (__q)                                                 \
 733            : "r" (__xx.__ll),                                           \
 734              "r" ((USItype) (d)));                                      \
 735   (r) = (n0) - __q * (d); (q) = __q; })
 736 #define UMUL_TIME 5
 737 #define UDIV_TIME 25
 738 #else
 739 #define UMUL_TIME 17
 740 #define UDIV_TIME 150
 741 #endif /* __mc88110__ */
 742 #endif /* __m88000__ */
 743
 744 #if defined (__mn10300__)
 745 # if defined (__AM33__)
 746 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 747 #  define umul_ppmm(w1, w0, u, v)               \
 748     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 749 #  define smul_ppmm(w1, w0, u, v)               \
 750     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 751 # else
 752 #  define umul_ppmm(w1, w0, u, v)               \
 753     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 754 #  define smul_ppmm(w1, w0, u, v)               \
 755     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 756 # endif
 757 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 758   do {                                          \
 759     DWunion __s, __a, __b;                      \
 760     __a.s.low = (al); __a.s.high = (ah);        \
 761     __b.s.low = (bl); __b.s.high = (bh);        \
 762     __s.ll = __a.ll + __b.ll;                   \
 763     (sl) = __s.s.low; (sh) = __s.s.high;        \
 764   } while (0)
 765 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 766   do {                                          \
 767     DWunion __s, __a, __b;                      \
 768     __a.s.low = (al); __a.s.high = (ah);        \
 769     __b.s.low = (bl); __b.s.high = (bh);        \
 770     __s.ll = __a.ll - __b.ll;                   \
 771     (sl) = __s.s.low; (sh) = __s.s.high;        \
 772   } while (0)
 773 # define udiv_qrnnd(q, r, nh, nl, d)            \
 774   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 775 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 776   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 777 # define UMUL_TIME 3
 778 # define UDIV_TIME 38
 779 #endif
 780
 781 #if defined (__mips__) && W_TYPE_SIZE == 32
 782 #define umul_ppmm(w1, w0, u, v)                                         \
 783   do {                                                                  \
 784     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 785     (w1) = (USItype) (__x >> 32);                                       \
 786     (w0) = (USItype) (__x);                                             \
 787   } while (0)
 788 #define UMUL_TIME 10
 789 #define UDIV_TIME 100
 790
 791 #if (__mips == 32 || __mips == 64) && ! __mips16
 792 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 793 #define COUNT_LEADING_ZEROS_0 32
 794 #endif
 795 #endif /* __mips__ */
 796
 797 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 798 #define umul_ppmm(w1, w0, u, v) \
 799   ({union {UDItype __ll;                                                \
 800            struct {USItype __l, __h;} __i;                              \
 801           } __xx;                                                       \
 802   __asm__ ("meid %2,%0"                                                 \
 803            : "=g" (__xx.__ll)                                           \
 804            : "%0" ((USItype) (u)),                                      \
 805              "g" ((USItype) (v)));                                      \
 806   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 807 #define __umulsidi3(u, v) \
 808   ({UDItype __w;                                                        \
 809     __asm__ ("meid %2,%0"                                               \
 810              : "=g" (__w)                                               \
 811              : "%0" ((USItype) (u)),                                    \
 812                "g" ((USItype) (v)));                                    \
 813     __w; })
 814 #define udiv_qrnnd(q, r, n1, n0, d) \
 815   ({union {UDItype __ll;                                                \
 816            struct {USItype __l, __h;} __i;                              \
 817           } __xx;                                                       \
 818   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 819   __asm__ ("deid %2,%0"                                                 \
 820            : "=g" (__xx.__ll)                                           \
 821            : "0" (__xx.__ll),                                           \
 822              "g" ((USItype) (d)));                                      \
 823   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 824 #define count_trailing_zeros(count,x) \
 825   do {                                                                  \
 826     __asm__ ("ffsd     %2,%0"                                           \
 827             : "=r" ((USItype) (count))                                  \
 828             : "0" ((USItype) 0),                                        \
 829               "r" ((USItype) (x)));                                     \
 830   } while (0)
 831 #endif /* __ns32000__ */
 832
 833 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 834    system vendor compilers.
 835    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 836    enough, since that hits ARM and m68k too.  */
 837 #if (defined (_ARCH_PPC)        /* AIX */                               \
 838      || defined (_ARCH_PWR)     /* AIX */                               \
 839      || defined (_ARCH_COM)     /* AIX */                               \
 840      || defined (__powerpc__)   /* gcc */                               \
 841      || defined (__POWERPC__)   /* BEOS */                              \
 842      || defined (__ppc__)       /* Darwin */                            \
 843      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 844      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 845          && CPU_FAMILY == PPC)                                                \
 846      ) && W_TYPE_SIZE == 32
 847 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 848   do {                                                                  \
 849     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 850       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 851              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 852     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 853       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 854              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 855     else                                                                \
 856       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 857              : "=r" (sh), "=&r" (sl)                                    \
 858              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 859   } while (0)
 860 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 861   do {                                                                  \
 862     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 863       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 864                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 865     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 866       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 867                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 868     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 869       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 870                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 871     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 872       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 873                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 874     else                                                                \
 875       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 876                : "=r" (sh), "=&r" (sl)                                  \
 877                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 878   } while (0)
 879 #define count_leading_zeros(count, x) \
 880   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 881 #define COUNT_LEADING_ZEROS_0 32
 882 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 883   || defined (__ppc__)                                                    \
 884   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 885   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 886          && CPU_FAMILY == PPC)
 887 #define umul_ppmm(ph, pl, m0, m1) \
 888   do {                                                                  \
 889     USItype __m0 = (m0), __m1 = (m1);                                   \
 890     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 891     (pl) = __m0 * __m1;                                                 \
 892   } while (0)
 893 #define UMUL_TIME 15
 894 #define smul_ppmm(ph, pl, m0, m1) \
 895   do {                                                                  \
 896     SItype __m0 = (m0), __m1 = (m1);                                    \
 897     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 898     (pl) = __m0 * __m1;                                                 \
 899   } while (0)
 900 #define SMUL_TIME 14
 901 #define UDIV_TIME 120
 902 #elif defined (_ARCH_PWR)
 903 #define UMUL_TIME 8
 904 #define smul_ppmm(xh, xl, m0, m1) \
 905   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 906 #define SMUL_TIME 4
 907 #define sdiv_qrnnd(q, r, nh, nl, d) \
 908   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 909 #define UDIV_TIME 100
 910 #endif
 911 #endif /* 32-bit POWER architecture variants.  */
 912
 913 /* We should test _IBMR2 here when we add assembly support for the system
 914    vendor compilers.  */
 915 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 916 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 917   do {                                                                  \
 918     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 919       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 920              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 921     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 922       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 923              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 924     else                                                                \
 925       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 926              : "=r" (sh), "=&r" (sl)                                    \
 927              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 928   } while (0)
 929 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 930   do {                                                                  \
 931     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 932       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 933                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 934     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 935       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 936                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 937     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 938       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 939                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 940     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 941       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 942                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 943     else                                                                \
 944       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 945                : "=r" (sh), "=&r" (sl)                                  \
 946                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 947   } while (0)
 948 #define count_leading_zeros(count, x) \
 949   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 950 #define COUNT_LEADING_ZEROS_0 64
 951 #define umul_ppmm(ph, pl, m0, m1) \
 952   do {                                                                  \
 953     UDItype __m0 = (m0), __m1 = (m1);                                   \
 954     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 955     (pl) = __m0 * __m1;                                                 \
 956   } while (0)
 957 #define UMUL_TIME 15
 958 #define smul_ppmm(ph, pl, m0, m1) \
 959   do {                                                                  \
 960     DItype __m0 = (m0), __m1 = (m1);                                    \
 961     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 962     (pl) = __m0 * __m1;                                                 \
 963   } while (0)
 964 #define SMUL_TIME 14  /* ??? */
 965 #define UDIV_TIME 120 /* ??? */
 966 #endif /* 64-bit PowerPC.  */
 967
 968 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 969 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 970   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 971            : "=r" ((USItype) (sh)),                                     \
 972              "=&r" ((USItype) (sl))                                     \
 973            : "%0" ((USItype) (ah)),                                     \
 974              "r" ((USItype) (bh)),                                      \
 975              "%1" ((USItype) (al)),                                     \
 976              "r" ((USItype) (bl)))
 977 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 978   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 979            : "=r" ((USItype) (sh)),                                     \
 980              "=&r" ((USItype) (sl))                                     \
 981            : "0" ((USItype) (ah)),                                      \
 982              "r" ((USItype) (bh)),                                      \
 983              "1" ((USItype) (al)),                                      \
 984              "r" ((USItype) (bl)))
 985 #define umul_ppmm(ph, pl, m0, m1) \
 986   do {                                                                  \
 987     USItype __m0 = (m0), __m1 = (m1);                                   \
 988     __asm__ (                                                           \
 989        "s       r2,r2\n"                                                \
 990 "       mts     r10,%2\n"                                               \
 991 "       m       r2,%3\n"                                                \
 992 "       m       r2,%3\n"                                                \
 993 "       m       r2,%3\n"                                                \
 994 "       m       r2,%3\n"                                                \
 995 "       m       r2,%3\n"                                                \
 996 "       m       r2,%3\n"                                                \
 997 "       m       r2,%3\n"                                                \
 998 "       m       r2,%3\n"                                                \
 999 "       m       r2,%3\n"                                                \
1000 "       m       r2,%3\n"                                                \
1001 "       m       r2,%3\n"                                                \
1002 "       m       r2,%3\n"                                                \
1003 "       m       r2,%3\n"                                                \
1004 "       m       r2,%3\n"                                                \
1005 "       m       r2,%3\n"                                                \
1006 "       m       r2,%3\n"                                                \
1007 "       cas     %0,r2,r0\n"                                             \
1008 "       mfs     r10,%1"                                                 \
1009              : "=r" ((USItype) (ph)),                                   \
1010                "=r" ((USItype) (pl))                                    \
1011              : "%r" (__m0),                                             \
1012                 "r" (__m1)                                              \
1013              : "r2");                                                   \
1014     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1015              + (((SItype) __m1 >> 31) & __m0));                         \
1016   } while (0)
1017 #define UMUL_TIME 20
1018 #define UDIV_TIME 200
1019 #define count_leading_zeros(count, x) \
1020   do {                                                                  \
1021     if ((x) >= 0x10000)                                                 \
1022       __asm__ ("clz     %0,%1"                                          \
1023                : "=r" ((USItype) (count))                               \
1024                : "r" ((USItype) (x) >> 16));                            \
1025     else                                                                \
1026       {                                                                 \
1027         __asm__ ("clz   %0,%1"                                          \
1028                  : "=r" ((USItype) (count))                             \
1029                  : "r" ((USItype) (x)));                                        \
1030         (count) += 16;                                                  \
1031       }                                                                 \
1032   } while (0)
1033 #endif
1034
1035 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1036 #ifndef __sh1__
1037 #define umul_ppmm(w1, w0, u, v) \
1038   __asm__ (                                                             \
1039        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1040            : "=r<" ((USItype)(w1)),                                     \
1041              "=r<" ((USItype)(w0))                                      \
1042            : "r" ((USItype)(u)),                                        \
1043              "r" ((USItype)(v))                                         \
1044            : "macl", "mach")
1045 #define UMUL_TIME 5
1046 #endif
1047
1048 /* This is the same algorithm as __udiv_qrnnd_c.  */
1049 #define UDIV_NEEDS_NORMALIZATION 1
1050
1051 #define udiv_qrnnd(q, r, n1, n0, d) \
1052   do {                                                                  \
1053     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1054                         __attribute__ ((visibility ("hidden")));        \
1055     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1056     __asm__ (                                                           \
1057         "mov%M4 %4,r5\n"                                                \
1058 "       swap.w %3,r4\n"                                                 \
1059 "       swap.w r5,r6\n"                                                 \
1060 "       jsr @%5\n"                                                      \
1061 "       shll16 r6\n"                                                    \
1062 "       swap.w r4,r4\n"                                                 \
1063 "       jsr @%5\n"                                                      \
1064 "       swap.w r1,%0\n"                                                 \
1065 "       or r1,%0"                                                       \
1066         : "=r" (q), "=&z" (r)                                           \
1067         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1068         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1069   } while (0)
1070
1071 #define UDIV_TIME 80
1072
1073 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1074   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1075            : "=r" (sh), "=r" (sl)                                       \
1076            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1077
1078 #endif /* __sh__ */
1079
1080 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1081 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1082 #define count_leading_zeros(count, x) \
1083   do                                                                    \
1084     {                                                                   \
1085       UDItype x_ = (USItype)(x);                                        \
1086       SItype c_;                                                        \
1087                                                                         \
1088       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1089       (count) = c_ - 31;                                                \
1090     }                                                                   \
1091   while (0)
1092 #define COUNT_LEADING_ZEROS_0 32
1093 #endif
1094
1095 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1096     && W_TYPE_SIZE == 32
1097 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1098   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1099            : "=r" ((USItype) (sh)),                                     \
1100              "=&r" ((USItype) (sl))                                     \
1101            : "%rJ" ((USItype) (ah)),                                    \
1102              "rI" ((USItype) (bh)),                                     \
1103              "%rJ" ((USItype) (al)),                                    \
1104              "rI" ((USItype) (bl))                                      \
1105            __CLOBBER_CC)
1106 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1107   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1108            : "=r" ((USItype) (sh)),                                     \
1109              "=&r" ((USItype) (sl))                                     \
1110            : "rJ" ((USItype) (ah)),                                     \
1111              "rI" ((USItype) (bh)),                                     \
1112              "rJ" ((USItype) (al)),                                     \
1113              "rI" ((USItype) (bl))                                      \
1114            __CLOBBER_CC)
1115 #if defined (__sparc_v8__)
1116 #define umul_ppmm(w1, w0, u, v) \
1117   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1118            : "=r" ((USItype) (w1)),                                     \
1119              "=r" ((USItype) (w0))                                      \
1120            : "r" ((USItype) (u)),                                       \
1121              "r" ((USItype) (v)))
1122 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1123   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1124            : "=&r" ((USItype) (__q)),                                   \
1125              "=&r" ((USItype) (__r))                                    \
1126            : "r" ((USItype) (__n1)),                                    \
1127              "r" ((USItype) (__n0)),                                    \
1128              "r" ((USItype) (__d)))
1129 #else
1130 #if defined (__sparclite__)
1131 /* This has hardware multiply but not divide.  It also has two additional
1132    instructions scan (ffs from high bit) and divscc.  */
1133 #define umul_ppmm(w1, w0, u, v) \
1134   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1135            : "=r" ((USItype) (w1)),                                     \
1136              "=r" ((USItype) (w0))                                      \
1137            : "r" ((USItype) (u)),                                       \
1138              "r" ((USItype) (v)))
1139 #define udiv_qrnnd(q, r, n1, n0, d) \
1140   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1141 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1142 "       tst     %%g0\n"                                                 \
1143 "       divscc  %3,%4,%%g1\n"                                           \
1144 "       divscc  %%g1,%4,%%g1\n"                                         \
1145 "       divscc  %%g1,%4,%%g1\n"                                         \
1146 "       divscc  %%g1,%4,%%g1\n"                                         \
1147 "       divscc  %%g1,%4,%%g1\n"                                         \
1148 "       divscc  %%g1,%4,%%g1\n"                                         \
1149 "       divscc  %%g1,%4,%%g1\n"                                         \
1150 "       divscc  %%g1,%4,%%g1\n"                                         \
1151 "       divscc  %%g1,%4,%%g1\n"                                         \
1152 "       divscc  %%g1,%4,%%g1\n"                                         \
1153 "       divscc  %%g1,%4,%%g1\n"                                         \
1154 "       divscc  %%g1,%4,%%g1\n"                                         \
1155 "       divscc  %%g1,%4,%%g1\n"                                         \
1156 "       divscc  %%g1,%4,%%g1\n"                                         \
1157 "       divscc  %%g1,%4,%%g1\n"                                         \
1158 "       divscc  %%g1,%4,%%g1\n"                                         \
1159 "       divscc  %%g1,%4,%%g1\n"                                         \
1160 "       divscc  %%g1,%4,%%g1\n"                                         \
1161 "       divscc  %%g1,%4,%%g1\n"                                         \
1162 "       divscc  %%g1,%4,%%g1\n"                                         \
1163 "       divscc  %%g1,%4,%%g1\n"                                         \
1164 "       divscc  %%g1,%4,%%g1\n"                                         \
1165 "       divscc  %%g1,%4,%%g1\n"                                         \
1166 "       divscc  %%g1,%4,%%g1\n"                                         \
1167 "       divscc  %%g1,%4,%%g1\n"                                         \
1168 "       divscc  %%g1,%4,%%g1\n"                                         \
1169 "       divscc  %%g1,%4,%%g1\n"                                         \
1170 "       divscc  %%g1,%4,%%g1\n"                                         \
1171 "       divscc  %%g1,%4,%%g1\n"                                         \
1172 "       divscc  %%g1,%4,%%g1\n"                                         \
1173 "       divscc  %%g1,%4,%%g1\n"                                         \
1174 "       divscc  %%g1,%4,%0\n"                                           \
1175 "       rd      %%y,%1\n"                                               \
1176 "       bl,a 1f\n"                                                      \
1177 "       add     %1,%4,%1\n"                                             \
1178 "1:     ! End of inline udiv_qrnnd"                                     \
1179            : "=r" ((USItype) (q)),                                      \
1180              "=r" ((USItype) (r))                                       \
1181            : "r" ((USItype) (n1)),                                      \
1182              "r" ((USItype) (n0)),                                      \
1183              "rI" ((USItype) (d))                                       \
1184            : "g1" __AND_CLOBBER_CC)
1185 #define UDIV_TIME 37
1186 #define count_leading_zeros(count, x) \
1187   do {                                                                  \
1188   __asm__ ("scan %1,1,%0"                                               \
1189            : "=r" ((USItype) (count))                                   \
1190            : "r" ((USItype) (x)));                                      \
1191   } while (0)
1192 /* Early sparclites return 63 for an argument of 0, but they warn that future
1193    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1194    undefined.  */
1195 #else
1196 /* SPARC without integer multiplication and divide instructions.
1197    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1198 #define umul_ppmm(w1, w0, u, v) \
1199   __asm__ ("! Inlined umul_ppmm\n"                                      \
1200 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1201 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1202 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1203 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1204 "       mulscc  %%g1,%3,%%g1\n"                                         \
1205 "       mulscc  %%g1,%3,%%g1\n"                                         \
1206 "       mulscc  %%g1,%3,%%g1\n"                                         \
1207 "       mulscc  %%g1,%3,%%g1\n"                                         \
1208 "       mulscc  %%g1,%3,%%g1\n"                                         \
1209 "       mulscc  %%g1,%3,%%g1\n"                                         \
1210 "       mulscc  %%g1,%3,%%g1\n"                                         \
1211 "       mulscc  %%g1,%3,%%g1\n"                                         \
1212 "       mulscc  %%g1,%3,%%g1\n"                                         \
1213 "       mulscc  %%g1,%3,%%g1\n"                                         \
1214 "       mulscc  %%g1,%3,%%g1\n"                                         \
1215 "       mulscc  %%g1,%3,%%g1\n"                                         \
1216 "       mulscc  %%g1,%3,%%g1\n"                                         \
1217 "       mulscc  %%g1,%3,%%g1\n"                                         \
1218 "       mulscc  %%g1,%3,%%g1\n"                                         \
1219 "       mulscc  %%g1,%3,%%g1\n"                                         \
1220 "       mulscc  %%g1,%3,%%g1\n"                                         \
1221 "       mulscc  %%g1,%3,%%g1\n"                                         \
1222 "       mulscc  %%g1,%3,%%g1\n"                                         \
1223 "       mulscc  %%g1,%3,%%g1\n"                                         \
1224 "       mulscc  %%g1,%3,%%g1\n"                                         \
1225 "       mulscc  %%g1,%3,%%g1\n"                                         \
1226 "       mulscc  %%g1,%3,%%g1\n"                                         \
1227 "       mulscc  %%g1,%3,%%g1\n"                                         \
1228 "       mulscc  %%g1,%3,%%g1\n"                                         \
1229 "       mulscc  %%g1,%3,%%g1\n"                                         \
1230 "       mulscc  %%g1,%3,%%g1\n"                                         \
1231 "       mulscc  %%g1,%3,%%g1\n"                                         \
1232 "       mulscc  %%g1,%3,%%g1\n"                                         \
1233 "       mulscc  %%g1,%3,%%g1\n"                                         \
1234 "       mulscc  %%g1,%3,%%g1\n"                                         \
1235 "       mulscc  %%g1,%3,%%g1\n"                                         \
1236 "       mulscc  %%g1,0,%%g1\n"                                          \
1237 "       add     %%g1,%%o5,%0\n"                                         \
1238 "       rd      %%y,%1"                                                 \
1239            : "=r" ((USItype) (w1)),                                     \
1240              "=r" ((USItype) (w0))                                      \
1241            : "%rI" ((USItype) (u)),                                     \
1242              "r" ((USItype) (v))                                                \
1243            : "g1", "o5" __AND_CLOBBER_CC)
1244 #define UMUL_TIME 39            /* 39 instructions */
1245 /* It's quite necessary to add this much assembler for the sparc.
1246    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1247 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1248   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1249 "       mov     32,%%g1\n"                                              \
1250 "       subcc   %1,%2,%%g0\n"                                           \
1251 "1:     bcs     5f\n"                                                   \
1252 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1253 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1254 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1255 "       subcc   %%g1,1,%%g1\n"                                          \
1256 "2:     bne     1b\n"                                                   \
1257 "        subcc  %1,%2,%%g0\n"                                           \
1258 "       bcs     3f\n"                                                   \
1259 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1260 "       b       3f\n"                                                   \
1261 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1262 "4:     sub     %1,%2,%1\n"                                             \
1263 "5:     addxcc  %1,%1,%1\n"                                             \
1264 "       bcc     2b\n"                                                   \
1265 "        subcc  %%g1,1,%%g1\n"                                          \
1266 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1267 "       bne     4b\n"                                                   \
1268 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1269 "       sub     %1,%2,%1\n"                                             \
1270 "3:     xnor    %0,0,%0\n"                                              \
1271 "       ! End of inline udiv_qrnnd"                                     \
1272            : "=&r" ((USItype) (__q)),                                   \
1273              "=&r" ((USItype) (__r))                                    \
1274            : "r" ((USItype) (__d)),                                     \
1275              "1" ((USItype) (__n1)),                                    \
1276              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1277 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1278 #endif /* __sparclite__ */
1279 #endif /* __sparc_v8__ */
1280 #endif /* sparc32 */
1281
1282 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1283     && W_TYPE_SIZE == 64
1284 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1285   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1286            "add %r2,%3,%0\n\t"                                          \
1287            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1288            "add %0, 1, %0\n"                                            \
1289            "1:"                                                         \
1290            : "=r" ((UDItype)(sh)),                                      \
1291              "=&r" ((UDItype)(sl))                                      \
1292            : "%rJ" ((UDItype)(ah)),                                     \
1293              "rI" ((UDItype)(bh)),                                      \
1294              "%rJ" ((UDItype)(al)),                                     \
1295              "rI" ((UDItype)(bl))                                       \
1296            __CLOBBER_CC)
1297
1298 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1299   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1300            "sub %r2,%3,%0\n\t"                                          \
1301            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1302            "sub %0, 1, %0\n\t"                                          \
1303            "1:"                                                         \
1304            : "=r" ((UDItype)(sh)),                                      \
1305              "=&r" ((UDItype)(sl))                                      \
1306            : "rJ" ((UDItype)(ah)),                                      \
1307              "rI" ((UDItype)(bh)),                                      \
1308              "rJ" ((UDItype)(al)),                                      \
1309              "rI" ((UDItype)(bl))                                       \
1310            __CLOBBER_CC)
1311
1312 #define umul_ppmm(wh, wl, u, v)                                         \
1313   do {                                                                  \
1314           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1315           __asm__ __volatile__ (                                        \
1316                    "srl %7,0,%3\n\t"                                    \
1317                    "mulx %3,%6,%1\n\t"                                  \
1318                    "srlx %6,32,%2\n\t"                                  \
1319                    "mulx %2,%3,%4\n\t"                                  \
1320                    "sllx %4,32,%5\n\t"                                  \
1321                    "srl %6,0,%3\n\t"                                    \
1322                    "sub %1,%5,%5\n\t"                                   \
1323                    "srlx %5,32,%5\n\t"                                  \
1324                    "addcc %4,%5,%4\n\t"                                 \
1325                    "srlx %7,32,%5\n\t"                                  \
1326                    "mulx %3,%5,%3\n\t"                                  \
1327                    "mulx %2,%5,%5\n\t"                                  \
1328                    "sethi %%hi(0x80000000),%2\n\t"                      \
1329                    "addcc %4,%3,%4\n\t"                                 \
1330                    "srlx %4,32,%4\n\t"                                  \
1331                    "add %2,%2,%2\n\t"                                   \
1332                    "movcc %%xcc,%%g0,%2\n\t"                            \
1333                    "addcc %5,%4,%5\n\t"                                 \
1334                    "sllx %3,32,%3\n\t"                                  \
1335                    "add %1,%3,%1\n\t"                                   \
1336                    "add %5,%2,%0"                                       \
1337            : "=r" ((UDItype)(wh)),                                      \
1338              "=&r" ((UDItype)(wl)),                                     \
1339              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1340            : "r" ((UDItype)(u)),                                        \
1341              "r" ((UDItype)(v))                                         \
1342            __CLOBBER_CC);                                               \
1343   } while (0)
1344 #define UMUL_TIME 96
1345 #define UDIV_TIME 230
1346 #endif /* sparc64 */
1347
1348 #if defined (__vax__) && W_TYPE_SIZE == 32
1349 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1350   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1351            : "=g" ((USItype) (sh)),                                     \
1352              "=&g" ((USItype) (sl))                                     \
1353            : "%0" ((USItype) (ah)),                                     \
1354              "g" ((USItype) (bh)),                                      \
1355              "%1" ((USItype) (al)),                                     \
1356              "g" ((USItype) (bl)))
1357 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1358   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1359            : "=g" ((USItype) (sh)),                                     \
1360              "=&g" ((USItype) (sl))                                     \
1361            : "0" ((USItype) (ah)),                                      \
1362              "g" ((USItype) (bh)),                                      \
1363              "1" ((USItype) (al)),                                      \
1364              "g" ((USItype) (bl)))
1365 #define umul_ppmm(xh, xl, m0, m1) \
1366   do {                                                                  \
1367     union {                                                             \
1368         UDItype __ll;                                                   \
1369         struct {USItype __l, __h;} __i;                                 \
1370       } __xx;                                                           \
1371     USItype __m0 = (m0), __m1 = (m1);                                   \
1372     __asm__ ("emul %1,%2,$0,%0"                                         \
1373              : "=r" (__xx.__ll)                                         \
1374              : "g" (__m0),                                              \
1375                "g" (__m1));                                             \
1376     (xh) = __xx.__i.__h;                                                \
1377     (xl) = __xx.__i.__l;                                                \
1378     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1379              + (((SItype) __m1 >> 31) & __m0));                         \
1380   } while (0)
1381 #define sdiv_qrnnd(q, r, n1, n0, d) \
1382   do {                                                                  \
1383     union {DItype __ll;                                                 \
1384            struct {SItype __l, __h;} __i;                               \
1385           } __xx;                                                       \
1386     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1387     __asm__ ("ediv %3,%2,%0,%1"                                         \
1388              : "=g" (q), "=g" (r)                                       \
1389              : "g" (__xx.__ll), "g" (d));                               \
1390   } while (0)
1391 #endif /* __vax__ */
1392
1393 #ifdef _TMS320C6X
1394 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1395   do                                                                    \
1396     {                                                                   \
1397       UDItype __ll;                                                     \
1398       __asm__ ("addu .l1 %1, %2, %0"                                    \
1399                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1400       (sl) = (USItype)__ll;                                             \
1401       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1402     }                                                                   \
1403   while (0)
1404
1405 #ifdef _TMS320C6400_PLUS
1406 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1407 #define umul_ppmm(w1, w0, u, v)                                         \
1408   do {                                                                  \
1409     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1410     (w1) = (USItype) (__x >> 32);                                       \
1411     (w0) = (USItype) (__x);                                             \
1412   } while (0)
1413 #endif  /* _TMS320C6400_PLUS */
1414
1415 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1416 #ifdef _TMS320C6400
1417 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1418 #endif
1419 #define UMUL_TIME 4
1420 #define UDIV_TIME 40
1421 #endif /* _TMS320C6X */
1422
1423 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1424 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1425    to expand builtin functions depending on what configuration features
1426    are available.  This avoids library calls when the operation can be
1427    performed in-line.  */
1428 #define umul_ppmm(w1, w0, u, v)                                         \
1429   do {                                                                  \
1430     DWunion __w;                                                        \
1431     __w.ll = __builtin_umulsidi3 (u, v);                                \
1432     w1 = __w.s.high;                                                    \
1433     w0 = __w.s.low;                                                     \
1434   } while (0)
1435 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1436 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1437 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1438 #endif /* __xtensa__ */
1439
1440 #if defined xstormy16
1441 extern UHItype __stormy16_count_leading_zeros (UHItype);
1442 #define count_leading_zeros(count, x)                                   \
1443   do                                                                    \
1444     {                                                                   \
1445       UHItype size;                                                     \
1446                                                                         \
1447       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1448       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1449         {                                                               \
1450           UHItype c;                                                    \
1451                                                                         \
1452           c = __clzhi2 ((x) >> (size - 16));                            \
1453           (count) += c;                                                 \
1454           if (c != 16)                                                  \
1455             break;                                                      \
1456         }                                                               \
1457     }                                                                   \
1458   while (0)
1459 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1460 #endif
1461
1462 #if defined (__z8000__) && W_TYPE_SIZE == 16
1463 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1464   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1465            : "=r" ((unsigned int)(sh)),                                 \
1466              "=&r" ((unsigned int)(sl))                                 \
1467            : "%0" ((unsigned int)(ah)),                                 \
1468              "r" ((unsigned int)(bh)),                                  \
1469              "%1" ((unsigned int)(al)),                                 \
1470              "rQR" ((unsigned int)(bl)))
1471 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1472   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1473            : "=r" ((unsigned int)(sh)),                                 \
1474              "=&r" ((unsigned int)(sl))                                 \
1475            : "0" ((unsigned int)(ah)),                                  \
1476              "r" ((unsigned int)(bh)),                                  \
1477              "1" ((unsigned int)(al)),                                  \
1478              "rQR" ((unsigned int)(bl)))
1479 #define umul_ppmm(xh, xl, m0, m1) \
1480   do {                                                                  \
1481     union {long int __ll;                                               \
1482            struct {unsigned int __h, __l;} __i;                         \
1483           } __xx;                                                       \
1484     unsigned int __m0 = (m0), __m1 = (m1);                              \
1485     __asm__ ("mult      %S0,%H3"                                        \
1486              : "=r" (__xx.__i.__h),                                     \
1487                "=r" (__xx.__i.__l)                                      \
1488              : "%1" (__m0),                                             \
1489                "rQR" (__m1));                                           \
1490     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1491     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1492              + (((signed int) __m1 >> 15) & __m0));                     \
1493   } while (0)
1494 #endif /* __z8000__ */
1495
1496 #endif /* __GNUC__ */
1497
1498 /* If this machine has no inline assembler, use C macros.  */
1499
1500 #if !defined (add_ssaaaa)
1501 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1502   do {                                                                  \
1503     UWtype __x;                                                         \
1504     __x = (al) + (bl);                                                  \
1505     (sh) = (ah) + (bh) + (__x < (al));                                  \
1506     (sl) = __x;                                                         \
1507   } while (0)
1508 #endif
1509
1510 #if !defined (sub_ddmmss)
1511 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1512   do {                                                                  \
1513     UWtype __x;                                                         \
1514     __x = (al) - (bl);                                                  \
1515     (sh) = (ah) - (bh) - (__x > (al));                                  \
1516     (sl) = __x;                                                         \
1517   } while (0)
1518 #endif
1519
1520 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1521    smul_ppmm.  */
1522 #if !defined (umul_ppmm) && defined (smul_ppmm)
1523 #define umul_ppmm(w1, w0, u, v)                                         \
1524   do {                                                                  \
1525     UWtype __w1;                                                        \
1526     UWtype __xm0 = (u), __xm1 = (v);                                    \
1527     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1528     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1529                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1530   } while (0)
1531 #endif
1532
1533 /* If we still don't have umul_ppmm, define it using plain C.  */
1534 #if !defined (umul_ppmm)
1535 #define umul_ppmm(w1, w0, u, v)                                         \
1536   do {                                                                  \
1537     UWtype __x0, __x1, __x2, __x3;                                      \
1538     UHWtype __ul, __vl, __uh, __vh;                                     \
1539                                                                         \
1540     __ul = __ll_lowpart (u);                                            \
1541     __uh = __ll_highpart (u);                                           \
1542     __vl = __ll_lowpart (v);                                            \
1543     __vh = __ll_highpart (v);                                           \
1544                                                                         \
1545     __x0 = (UWtype) __ul * __vl;                                        \
1546     __x1 = (UWtype) __ul * __vh;                                        \
1547     __x2 = (UWtype) __uh * __vl;                                        \
1548     __x3 = (UWtype) __uh * __vh;                                        \
1549                                                                         \
1550     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1551     __x1 += __x2;               /* but this indeed can */               \
1552     if (__x1 < __x2)            /* did we get it? */                    \
1553       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1554                                                                         \
1555     (w1) = __x3 + __ll_highpart (__x1);                                 \
1556     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1557   } while (0)
1558 #endif
1559
1560 #if !defined (__umulsidi3)
1561 #define __umulsidi3(u, v) \
1562   ({DWunion __w;                                                        \
1563     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1564     __w.ll; })
1565 #endif
1566
1567 /* Define this unconditionally, so it can be used for debugging.  */
1568 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1569   do {                                                                  \
1570     UWtype __d1, __d0, __q1, __q0;                                      \
1571     UWtype __r1, __r0, __m;                                             \
1572     __d1 = __ll_highpart (d);                                           \
1573     __d0 = __ll_lowpart (d);                                            \
1574                                                                         \
1575     __r1 = (n1) % __d1;                                                 \
1576     __q1 = (n1) / __d1;                                                 \
1577     __m = (UWtype) __q1 * __d0;                                         \
1578     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1579     if (__r1 < __m)                                                     \
1580       {                                                                 \
1581         __q1--, __r1 += (d);                                            \
1582         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1583           if (__r1 < __m)                                               \
1584             __q1--, __r1 += (d);                                        \
1585       }                                                                 \
1586     __r1 -= __m;                                                        \
1587                                                                         \
1588     __r0 = __r1 % __d1;                                                 \
1589     __q0 = __r1 / __d1;                                                 \
1590     __m = (UWtype) __q0 * __d0;                                         \
1591     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1592     if (__r0 < __m)                                                     \
1593       {                                                                 \
1594         __q0--, __r0 += (d);                                            \
1595         if (__r0 >= (d))                                                \
1596           if (__r0 < __m)                                               \
1597             __q0--, __r0 += (d);                                        \
1598       }                                                                 \
1599     __r0 -= __m;                                                        \
1600                                                                         \
1601     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1602     (r) = __r0;                                                         \
1603   } while (0)
1604
1605 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1606    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1607 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1608 #define udiv_qrnnd(q, r, nh, nl, d) \
1609   do {                                                                  \
1610     USItype __r;                                                        \
1611     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1612     (r) = __r;                                                          \
1613   } while (0)
1614 #endif
1615
1616 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1617 #if !defined (udiv_qrnnd)
1618 #define UDIV_NEEDS_NORMALIZATION 1
1619 #define udiv_qrnnd __udiv_qrnnd_c
1620 #endif
1621
1622 #if !defined (count_leading_zeros)
1623 #define count_leading_zeros(count, x) \
1624   do {                                                                  \
1625     UWtype __xr = (x);                                                  \
1626     UWtype __a;                                                         \
1627                                                                         \
1628     if (W_TYPE_SIZE <= 32)                                              \
1629       {                                                                 \
1630         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1631           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1632           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1633       }                                                                 \
1634     else                                                                \
1635       {                                                                 \
1636         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1637           if (((__xr >> __a) & 0xff) != 0)                              \
1638             break;                                                      \
1639       }                                                                 \
1640                                                                         \
1641     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1642   } while (0)
1643 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1644 #endif
1645
1646 #if !defined (count_trailing_zeros)
1647 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1648    defined in asm, but if it is not, the C version above is good enough.  */
1649 #define count_trailing_zeros(count, x) \
1650   do {                                                                  \
1651     UWtype __ctz_x = (x);                                               \
1652     UWtype __ctz_c;                                                     \
1653     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1654     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1655   } while (0)
1656 #endif
1657
1658 #ifndef UDIV_NEEDS_NORMALIZATION
1659 #define UDIV_NEEDS_NORMALIZATION 0
1660 #endif