stdlib/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, write to the Free
  29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  30    MA 02110-1301, USA.  */
  31
  32 /* You have to define the following before including this file:
  33
  34    UWtype -- An unsigned type, default type for operations (typically a "word")
  35    UHWtype -- An unsigned type, at least half the size of UWtype.
  36    UDWtype -- An unsigned type, at least twice as large a UWtype
  37    W_TYPE_SIZE -- size in bits of UWtype
  38
  39    UQItype -- Unsigned 8 bit type.
  40    SItype, USItype -- Signed and unsigned 32 bit types.
  41    DItype, UDItype -- Signed and unsigned 64 bit types.
  42
  43    On a 32 bit machine UWtype should typically be USItype;
  44    on a 64 bit machine, UWtype should typically be UDItype.  */
  45
  46 #define __BITS4 (W_TYPE_SIZE / 4)
  47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  50
  51 #ifndef W_TYPE_SIZE
  52 #define W_TYPE_SIZE     32
  53 #define UWtype          USItype
  54 #define UHWtype         USItype
  55 #define UDWtype         UDItype
  56 #endif
  57
  58 /* Used in glibc only.  */
  59 #ifndef attribute_hidden
  60 #define attribute_hidden
  61 #endif
  62
  63 extern const UQItype __clz_tab[256] attribute_hidden;
  64
  65 /* Define auxiliary asm macros.
  66
  67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  69    word product in HIGH_PROD and LOW_PROD.
  70
  71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  72    UDWtype product.  This is just a variant of umul_ppmm.
  73
  74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  75    denominator) divides a UDWtype, composed by the UWtype integers
  76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  78    than DENOMINATOR for correct operation.  If, in addition, the most
  79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  80    UDIV_NEEDS_NORMALIZATION is defined to 1.
  81
  82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  84    is rounded towards 0.
  85
  86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  87    msb to the first nonzero bit in the UWtype X.  This is the number of
  88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  90
  91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  92    from the least significant end.
  93
  94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  98    (i.e. carry out) is not stored anywhere, and is lost.
  99
 100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 105    and is lost.
 106
 107    If any of these macros are left undefined for a particular CPU,
 108    C macros are used.  */
 109
 110 /* The CPUs come in alphabetical order below.
 111
 112    Please add support for more CPUs here, or improve the current support
 113    for the CPUs below!
 114    (E.g. WE32100, IBM360.)  */
 115
 116 #if defined (__GNUC__) && !defined (NO_ASM)
 117
 118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 119    understood by gcc1.  Use cpp to avoid major code duplication.  */
 120 #if __GNUC__ < 2
 121 #define __CLOBBER_CC
 122 #define __AND_CLOBBER_CC
 123 #else /* __GNUC__ >= 2 */
 124 #define __CLOBBER_CC : "cc"
 125 #define __AND_CLOBBER_CC , "cc"
 126 #endif /* __GNUC__ < 2 */
 127
 128 #if defined (__alpha) && W_TYPE_SIZE == 64
 129 #define umul_ppmm(ph, pl, m0, m1) \
 130   do {                                                                  \
 131     UDItype __m0 = (m0), __m1 = (m1);                                   \
 132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 133     (pl) = __m0 * __m1;                                                 \
 134   } while (0)
 135 #define UMUL_TIME 46
 136 #ifndef LONGLONG_STANDALONE
 137 #define udiv_qrnnd(q, r, n1, n0, d) \
 138   do { UDItype __r;                                                     \
 139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 140     (r) = __r;                                                          \
 141   } while (0)
 142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 143 #define UDIV_TIME 220
 144 #endif /* LONGLONG_STANDALONE */
 145 #ifdef __alpha_cix__
 146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 148 #define COUNT_LEADING_ZEROS_0 64
 149 #else
 150 #define count_leading_zeros(COUNT,X) \
 151   do {                                                                  \
 152     UDItype __xr = (X), __t, __a;                                       \
 153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 155     __t = __builtin_alpha_extbl (__xr, __a);                            \
 156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 157   } while (0)
 158 #define count_trailing_zeros(COUNT,X) \
 159   do {                                                                  \
 160     UDItype __xr = (X), __t, __a;                                       \
 161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 162     __t = ~__t & -~__t;                                                 \
 163     __a = ((__t & 0xCC) != 0) * 2;                                      \
 164     __a += ((__t & 0xF0) != 0) * 4;                                     \
 165     __a += ((__t & 0xAA) != 0);                                         \
 166     __t = __builtin_alpha_extbl (__xr, __a);                            \
 167     __a <<= 3;                                                          \
 168     __t &= -__t;                                                        \
 169     __a += ((__t & 0xCC) != 0) * 2;                                     \
 170     __a += ((__t & 0xF0) != 0) * 4;                                     \
 171     __a += ((__t & 0xAA) != 0);                                         \
 172     (COUNT) = __a;                                                      \
 173   } while (0)
 174 #endif /* __alpha_cix__ */
 175 #endif /* __alpha */
 176
 177 #if defined (__arc__) && W_TYPE_SIZE == 32
 178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 180            : "=r" ((USItype) (sh)),                                     \
 181              "=&r" ((USItype) (sl))                                     \
 182            : "%r" ((USItype) (ah)),                                     \
 183              "rIJ" ((USItype) (bh)),                                    \
 184              "%r" ((USItype) (al)),                                     \
 185              "rIJ" ((USItype) (bl)))
 186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 188            : "=r" ((USItype) (sh)),                                     \
 189              "=&r" ((USItype) (sl))                                     \
 190            : "r" ((USItype) (ah)),                                      \
 191              "rIJ" ((USItype) (bh)),                                    \
 192              "r" ((USItype) (al)),                                      \
 193              "rIJ" ((USItype) (bl)))
 194 /* Call libgcc routine.  */
 195 #define umul_ppmm(w1, w0, u, v) \
 196 do {                                                                    \
 197   DWunion __w;                                                          \
 198   __w.ll = __umulsidi3 (u, v);                                          \
 199   w1 = __w.s.high;                                                      \
 200   w0 = __w.s.low;                                                       \
 201 } while (0)
 202 #define __umulsidi3 __umulsidi3
 203 UDItype __umulsidi3 (USItype, USItype);
 204 #endif
 205
 206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 209            : "=r" ((USItype) (sh)),                                     \
 210              "=&r" ((USItype) (sl))                                     \
 211            : "%r" ((USItype) (ah)),                                     \
 212              "rI" ((USItype) (bh)),                                     \
 213              "%r" ((USItype) (al)),                                     \
 214              "rI" ((USItype) (bl)) __CLOBBER_CC)
 215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 217            : "=r" ((USItype) (sh)),                                     \
 218              "=&r" ((USItype) (sl))                                     \
 219            : "r" ((USItype) (ah)),                                      \
 220              "rI" ((USItype) (bh)),                                     \
 221              "r" ((USItype) (al)),                                      \
 222              "rI" ((USItype) (bl)) __CLOBBER_CC)
 223 #define umul_ppmm(xh, xl, a, b) \
 224 {register USItype __t0, __t1, __t2;                                     \
 225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
 226            "    mov     %2, %5, lsr #16\n"                              \
 227            "    mov     %0, %6, lsr #16\n"                              \
 228            "    bic     %3, %5, %2, lsl #16\n"                          \
 229            "    bic     %4, %6, %0, lsl #16\n"                          \
 230            "    mul     %1, %3, %4\n"                                   \
 231            "    mul     %4, %2, %4\n"                                   \
 232            "    mul     %3, %0, %3\n"                                   \
 233            "    mul     %0, %2, %0\n"                                   \
 234            "    adds    %3, %4, %3\n"                                   \
 235            "    addcs   %0, %0, #65536\n"                               \
 236            "    adds    %1, %1, %3, lsl #16\n"                          \
 237            "    adc     %0, %0, %3, lsr #16"                            \
 238            : "=&r" ((USItype) (xh)),                                    \
 239              "=r" ((USItype) (xl)),                                     \
 240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 241            : "r" ((USItype) (a)),                                       \
 242              "r" ((USItype) (b)) __CLOBBER_CC );}
 243 #define UMUL_TIME 20
 244 #define UDIV_TIME 100
 245 #endif /* __arm__ */
 246
 247 #if defined(__arm__)
 248 /* Let gcc decide how best to implement count_leading_zeros.  */
 249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 250 #define COUNT_LEADING_ZEROS_0 32
 251 #endif
 252
 253 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 254 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 255 #if __CRIS_arch_version >= 8
 256 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 257 #endif
 258 #endif /* __CRIS__ */
 259
 260 #if defined (__hppa) && W_TYPE_SIZE == 32
 261 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 262   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 263            : "=r" ((USItype) (sh)),                                     \
 264              "=&r" ((USItype) (sl))                                     \
 265            : "%rM" ((USItype) (ah)),                                    \
 266              "rM" ((USItype) (bh)),                                     \
 267              "%rM" ((USItype) (al)),                                    \
 268              "rM" ((USItype) (bl)))
 269 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 270   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 271            : "=r" ((USItype) (sh)),                                     \
 272              "=&r" ((USItype) (sl))                                     \
 273            : "rM" ((USItype) (ah)),                                     \
 274              "rM" ((USItype) (bh)),                                     \
 275              "rM" ((USItype) (al)),                                     \
 276              "rM" ((USItype) (bl)))
 277 #if defined (_PA_RISC1_1)
 278 #define umul_ppmm(w1, w0, u, v) \
 279   do {                                                                  \
 280     union                                                               \
 281       {                                                                 \
 282         UDItype __f;                                                    \
 283         struct {USItype __w1, __w0;} __w1w0;                            \
 284       } __t;                                                            \
 285     __asm__ ("xmpyu %1,%2,%0"                                           \
 286              : "=x" (__t.__f)                                           \
 287              : "x" ((USItype) (u)),                                     \
 288                "x" ((USItype) (v)));                                    \
 289     (w1) = __t.__w1w0.__w1;                                             \
 290     (w0) = __t.__w1w0.__w0;                                             \
 291      } while (0)
 292 #define UMUL_TIME 8
 293 #else
 294 #define UMUL_TIME 30
 295 #endif
 296 #define UDIV_TIME 40
 297 #define count_leading_zeros(count, x) \
 298   do {                                                                  \
 299     USItype __tmp;                                                      \
 300     __asm__ (                                                           \
 301        "ldi             1,%0\n"                                         \
 302 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 303 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 304 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 305 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 306 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 307 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 308 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 309 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 310 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 311 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 312 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 313 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 314 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 315 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 316         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 317   } while (0)
 318 #endif
 319
 320 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 321 #if !defined (__zarch__)
 322 #define smul_ppmm(xh, xl, m0, m1) \
 323   do {                                                                  \
 324     union {DItype __ll;                                                 \
 325            struct {USItype __h, __l;} __i;                              \
 326           } __x;                                                        \
 327     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 328              : "=&r" (__x.__ll)                                         \
 329              : "r" (m0), "r" (m1));                                     \
 330     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 331   } while (0)
 332 #define sdiv_qrnnd(q, r, n1, n0, d) \
 333   do {                                                                  \
 334     union {DItype __ll;                                                 \
 335            struct {USItype __h, __l;} __i;                              \
 336           } __x;                                                        \
 337     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 338     __asm__ ("dr %0,%2"                                                 \
 339              : "=r" (__x.__ll)                                          \
 340              : "0" (__x.__ll), "r" (d));                                \
 341     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 342   } while (0)
 343 #else
 344 #define smul_ppmm(xh, xl, m0, m1) \
 345   do {                                                                  \
 346     register SItype __r0 __asm__ ("0");                                 \
 347     register SItype __r1 __asm__ ("1") = (m0);                          \
 348                                                                         \
 349     __asm__ ("mr\t%%r0,%3"                                              \
 350              : "=r" (__r0), "=r" (__r1)                                 \
 351              : "r"  (__r1),  "r" (m1));                                 \
 352     (xh) = __r0; (xl) = __r1;                                           \
 353   } while (0)
 354
 355 #define sdiv_qrnnd(q, r, n1, n0, d) \
 356   do {                                                                  \
 357     register SItype __r0 __asm__ ("0") = (n1);                          \
 358     register SItype __r1 __asm__ ("1") = (n0);                          \
 359                                                                         \
 360     __asm__ ("dr\t%%r0,%4"                                              \
 361              : "=r" (__r0), "=r" (__r1)                                 \
 362              : "r" (__r0), "r" (__r1), "r" (d));                        \
 363     (q) = __r1; (r) = __r0;                                             \
 364   } while (0)
 365 #endif /* __zarch__ */
 366 #endif
 367
 368 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 369 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 370   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 371            : "=r" ((USItype) (sh)),                                     \
 372              "=&r" ((USItype) (sl))                                     \
 373            : "%0" ((USItype) (ah)),                                     \
 374              "g" ((USItype) (bh)),                                      \
 375              "%1" ((USItype) (al)),                                     \
 376              "g" ((USItype) (bl)))
 377 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 378   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 379            : "=r" ((USItype) (sh)),                                     \
 380              "=&r" ((USItype) (sl))                                     \
 381            : "0" ((USItype) (ah)),                                      \
 382              "g" ((USItype) (bh)),                                      \
 383              "1" ((USItype) (al)),                                      \
 384              "g" ((USItype) (bl)))
 385 #define umul_ppmm(w1, w0, u, v) \
 386   __asm__ ("mul{l} %3"                                                  \
 387            : "=a" ((USItype) (w0)),                                     \
 388              "=d" ((USItype) (w1))                                      \
 389            : "%0" ((USItype) (u)),                                      \
 390              "rm" ((USItype) (v)))
 391 #define udiv_qrnnd(q, r, n1, n0, dv) \
 392   __asm__ ("div{l} %4"                                                  \
 393            : "=a" ((USItype) (q)),                                      \
 394              "=d" ((USItype) (r))                                       \
 395            : "0" ((USItype) (n0)),                                      \
 396              "1" ((USItype) (n1)),                                      \
 397              "rm" ((USItype) (dv)))
 398 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 399 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 400 #define UMUL_TIME 40
 401 #define UDIV_TIME 40
 402 #endif /* 80x86 */
 403
 404 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 405 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 406   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 407            : "=r" ((UDItype) (sh)),                                     \
 408              "=&r" ((UDItype) (sl))                                     \
 409            : "%0" ((UDItype) (ah)),                                     \
 410              "rme" ((UDItype) (bh)),                                    \
 411              "%1" ((UDItype) (al)),                                     \
 412              "rme" ((UDItype) (bl)))
 413 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 414   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 415            : "=r" ((UDItype) (sh)),                                     \
 416              "=&r" ((UDItype) (sl))                                     \
 417            : "0" ((UDItype) (ah)),                                      \
 418              "rme" ((UDItype) (bh)),                                    \
 419              "1" ((UDItype) (al)),                                      \
 420              "rme" ((UDItype) (bl)))
 421 #define umul_ppmm(w1, w0, u, v) \
 422   __asm__ ("mul{q} %3"                                                  \
 423            : "=a" ((UDItype) (w0)),                                     \
 424              "=d" ((UDItype) (w1))                                      \
 425            : "%0" ((UDItype) (u)),                                      \
 426              "rm" ((UDItype) (v)))
 427 #define udiv_qrnnd(q, r, n1, n0, dv) \
 428   __asm__ ("div{q} %4"                                                  \
 429            : "=a" ((UDItype) (q)),                                      \
 430              "=d" ((UDItype) (r))                                       \
 431            : "0" ((UDItype) (n0)),                                      \
 432              "1" ((UDItype) (n1)),                                      \
 433              "rm" ((UDItype) (dv)))
 434 #define count_leading_zeros(count, x)   ((count) = __builtin_clzl (x))
 435 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzl (x))
 436 #define UMUL_TIME 40
 437 #define UDIV_TIME 40
 438 #endif /* x86_64 */
 439
 440 #if defined (__i960__) && W_TYPE_SIZE == 32
 441 #define umul_ppmm(w1, w0, u, v) \
 442   ({union {UDItype __ll;                                                \
 443            struct {USItype __l, __h;} __i;                              \
 444           } __xx;                                                       \
 445   __asm__ ("emul        %2,%1,%0"                                       \
 446            : "=d" (__xx.__ll)                                           \
 447            : "%dI" ((USItype) (u)),                                     \
 448              "dI" ((USItype) (v)));                                     \
 449   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 450 #define __umulsidi3(u, v) \
 451   ({UDItype __w;                                                        \
 452     __asm__ ("emul      %2,%1,%0"                                       \
 453              : "=d" (__w)                                               \
 454              : "%dI" ((USItype) (u)),                                   \
 455                "dI" ((USItype) (v)));                                   \
 456     __w; })
 457 #endif /* __i960__ */
 458
 459 #if defined (__ia64) && W_TYPE_SIZE == 64
 460 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 461    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 462    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 463    register, which takes an extra cycle.  */
 464 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 465   do {                                                                  \
 466     UWtype __x;                                                         \
 467     __x = (al) - (bl);                                                  \
 468     if ((al) < (bl))                                                    \
 469       (sh) = (ah) - (bh) - 1;                                           \
 470     else                                                                \
 471       (sh) = (ah) - (bh);                                               \
 472     (sl) = __x;                                                         \
 473   } while (0)
 474
 475 /* Do both product parts in assembly, since that gives better code with
 476    all gcc versions.  Some callers will just use the upper part, and in
 477    that situation we waste an instruction, but not any cycles.  */
 478 #define umul_ppmm(ph, pl, m0, m1)                                       \
 479   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 480            : "=&f" (ph), "=f" (pl)                                      \
 481            : "f" (m0), "f" (m1))
 482 #define count_leading_zeros(count, x)                                   \
 483   do {                                                                  \
 484     UWtype _x = (x), _y, _a, _c;                                        \
 485     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 486     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 487     _c = (_a - 1) << 3;                                                 \
 488     _x >>= _c;                                                          \
 489     if (_x >= 1 << 4)                                                   \
 490       _x >>= 4, _c += 4;                                                \
 491     if (_x >= 1 << 2)                                                   \
 492       _x >>= 2, _c += 2;                                                \
 493     _c += _x >> 1;                                                      \
 494     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 495   } while (0)
 496 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 497    based, and we don't need a special case for x==0 here */
 498 #define count_trailing_zeros(count, x)                                  \
 499   do {                                                                  \
 500     UWtype __ctz_x = (x);                                               \
 501     __asm__ ("popcnt %0 = %1"                                           \
 502              : "=r" (count)                                             \
 503              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 504   } while (0)
 505 #define UMUL_TIME 14
 506 #endif
 507
 508 #if defined (__M32R__) && W_TYPE_SIZE == 32
 509 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 510   /* The cmp clears the condition bit.  */ \
 511   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 512            : "=r" ((USItype) (sh)),                                     \
 513              "=&r" ((USItype) (sl))                                     \
 514            : "0" ((USItype) (ah)),                                      \
 515              "r" ((USItype) (bh)),                                      \
 516              "1" ((USItype) (al)),                                      \
 517              "r" ((USItype) (bl))                                       \
 518            : "cbit")
 519 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 520   /* The cmp clears the condition bit.  */ \
 521   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 522            : "=r" ((USItype) (sh)),                                     \
 523              "=&r" ((USItype) (sl))                                     \
 524            : "0" ((USItype) (ah)),                                      \
 525              "r" ((USItype) (bh)),                                      \
 526              "1" ((USItype) (al)),                                      \
 527              "r" ((USItype) (bl))                                       \
 528            : "cbit")
 529 #endif /* __M32R__ */
 530
 531 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 532 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 533   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 534            : "=d" ((USItype) (sh)),                                     \
 535              "=&d" ((USItype) (sl))                                     \
 536            : "%0" ((USItype) (ah)),                                     \
 537              "d" ((USItype) (bh)),                                      \
 538              "%1" ((USItype) (al)),                                     \
 539              "g" ((USItype) (bl)))
 540 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 541   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 542            : "=d" ((USItype) (sh)),                                     \
 543              "=&d" ((USItype) (sl))                                     \
 544            : "0" ((USItype) (ah)),                                      \
 545              "d" ((USItype) (bh)),                                      \
 546              "1" ((USItype) (al)),                                      \
 547              "g" ((USItype) (bl)))
 548
 549 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 550 #if (defined (__mc68020__) && !defined (__mc68060__))
 551 #define umul_ppmm(w1, w0, u, v) \
 552   __asm__ ("mulu%.l %3,%1:%0"                                           \
 553            : "=d" ((USItype) (w0)),                                     \
 554              "=d" ((USItype) (w1))                                      \
 555            : "%0" ((USItype) (u)),                                      \
 556              "dmi" ((USItype) (v)))
 557 #define UMUL_TIME 45
 558 #define udiv_qrnnd(q, r, n1, n0, d) \
 559   __asm__ ("divu%.l %4,%1:%0"                                           \
 560            : "=d" ((USItype) (q)),                                      \
 561              "=d" ((USItype) (r))                                       \
 562            : "0" ((USItype) (n0)),                                      \
 563              "1" ((USItype) (n1)),                                      \
 564              "dmi" ((USItype) (d)))
 565 #define UDIV_TIME 90
 566 #define sdiv_qrnnd(q, r, n1, n0, d) \
 567   __asm__ ("divs%.l %4,%1:%0"                                           \
 568            : "=d" ((USItype) (q)),                                      \
 569              "=d" ((USItype) (r))                                       \
 570            : "0" ((USItype) (n0)),                                      \
 571              "1" ((USItype) (n1)),                                      \
 572              "dmi" ((USItype) (d)))
 573
 574 #elif defined (__mcoldfire__) /* not mc68020 */
 575
 576 #define umul_ppmm(xh, xl, a, b) \
 577   __asm__ ("| Inlined umul_ppmm\n"                                      \
 578            "    move%.l %2,%/d0\n"                                      \
 579            "    move%.l %3,%/d1\n"                                      \
 580            "    move%.l %/d0,%/d2\n"                                    \
 581            "    swap    %/d0\n"                                         \
 582            "    move%.l %/d1,%/d3\n"                                    \
 583            "    swap    %/d1\n"                                         \
 584            "    move%.w %/d2,%/d4\n"                                    \
 585            "    mulu    %/d3,%/d4\n"                                    \
 586            "    mulu    %/d1,%/d2\n"                                    \
 587            "    mulu    %/d0,%/d3\n"                                    \
 588            "    mulu    %/d0,%/d1\n"                                    \
 589            "    move%.l %/d4,%/d0\n"                                    \
 590            "    clr%.w  %/d0\n"                                         \
 591            "    swap    %/d0\n"                                         \
 592            "    add%.l  %/d0,%/d2\n"                                    \
 593            "    add%.l  %/d3,%/d2\n"                                    \
 594            "    jcc     1f\n"                                           \
 595            "    add%.l  %#65536,%/d1\n"                                 \
 596            "1:  swap    %/d2\n"                                         \
 597            "    moveq   %#0,%/d0\n"                                     \
 598            "    move%.w %/d2,%/d0\n"                                    \
 599            "    move%.w %/d4,%/d2\n"                                    \
 600            "    move%.l %/d2,%1\n"                                      \
 601            "    add%.l  %/d1,%/d0\n"                                    \
 602            "    move%.l %/d0,%0"                                        \
 603            : "=g" ((USItype) (xh)),                                     \
 604              "=g" ((USItype) (xl))                                      \
 605            : "g" ((USItype) (a)),                                       \
 606              "g" ((USItype) (b))                                        \
 607            : "d0", "d1", "d2", "d3", "d4")
 608 #define UMUL_TIME 100
 609 #define UDIV_TIME 400
 610 #else /* not ColdFire */
 611 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 612 #define umul_ppmm(xh, xl, a, b) \
 613   __asm__ ("| Inlined umul_ppmm\n"                                      \
 614            "    move%.l %2,%/d0\n"                                      \
 615            "    move%.l %3,%/d1\n"                                      \
 616            "    move%.l %/d0,%/d2\n"                                    \
 617            "    swap    %/d0\n"                                         \
 618            "    move%.l %/d1,%/d3\n"                                    \
 619            "    swap    %/d1\n"                                         \
 620            "    move%.w %/d2,%/d4\n"                                    \
 621            "    mulu    %/d3,%/d4\n"                                    \
 622            "    mulu    %/d1,%/d2\n"                                    \
 623            "    mulu    %/d0,%/d3\n"                                    \
 624            "    mulu    %/d0,%/d1\n"                                    \
 625            "    move%.l %/d4,%/d0\n"                                    \
 626            "    eor%.w  %/d0,%/d0\n"                                    \
 627            "    swap    %/d0\n"                                         \
 628            "    add%.l  %/d0,%/d2\n"                                    \
 629            "    add%.l  %/d3,%/d2\n"                                    \
 630            "    jcc     1f\n"                                           \
 631            "    add%.l  %#65536,%/d1\n"                                 \
 632            "1:  swap    %/d2\n"                                         \
 633            "    moveq   %#0,%/d0\n"                                     \
 634            "    move%.w %/d2,%/d0\n"                                    \
 635            "    move%.w %/d4,%/d2\n"                                    \
 636            "    move%.l %/d2,%1\n"                                      \
 637            "    add%.l  %/d1,%/d0\n"                                    \
 638            "    move%.l %/d0,%0"                                        \
 639            : "=g" ((USItype) (xh)),                                     \
 640              "=g" ((USItype) (xl))                                      \
 641            : "g" ((USItype) (a)),                                       \
 642              "g" ((USItype) (b))                                        \
 643            : "d0", "d1", "d2", "d3", "d4")
 644 #define UMUL_TIME 100
 645 #define UDIV_TIME 400
 646
 647 #endif /* not mc68020 */
 648
 649 /* The '020, '030, '040 and '060 have bitfield insns.
 650    cpu32 disguises as a 68020, but lacks them.  */
 651 #if defined (__mc68020__) && !defined (__mcpu32__)
 652 #define count_leading_zeros(count, x) \
 653   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 654            : "=d" ((USItype) (count))                                   \
 655            : "od" ((USItype) (x)), "n" (0))
 656 /* Some ColdFire architectures have a ff1 instruction supported via
 657    __builtin_clz. */
 658 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 659 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 660 #define COUNT_LEADING_ZEROS_0 32
 661 #endif
 662 #endif /* mc68000 */
 663
 664 #if defined (__m88000__) && W_TYPE_SIZE == 32
 665 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 666   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 667            : "=r" ((USItype) (sh)),                                     \
 668              "=&r" ((USItype) (sl))                                     \
 669            : "%rJ" ((USItype) (ah)),                                    \
 670              "rJ" ((USItype) (bh)),                                     \
 671              "%rJ" ((USItype) (al)),                                    \
 672              "rJ" ((USItype) (bl)))
 673 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 674   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 675            : "=r" ((USItype) (sh)),                                     \
 676              "=&r" ((USItype) (sl))                                     \
 677            : "rJ" ((USItype) (ah)),                                     \
 678              "rJ" ((USItype) (bh)),                                     \
 679              "rJ" ((USItype) (al)),                                     \
 680              "rJ" ((USItype) (bl)))
 681 #define count_leading_zeros(count, x) \
 682   do {                                                                  \
 683     USItype __cbtmp;                                                    \
 684     __asm__ ("ff1 %0,%1"                                                \
 685              : "=r" (__cbtmp)                                           \
 686              : "r" ((USItype) (x)));                                    \
 687     (count) = __cbtmp ^ 31;                                             \
 688   } while (0)
 689 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 690 #if defined (__mc88110__)
 691 #define umul_ppmm(wh, wl, u, v) \
 692   do {                                                                  \
 693     union {UDItype __ll;                                                \
 694            struct {USItype __h, __l;} __i;                              \
 695           } __xx;                                                       \
 696     __asm__ ("mulu.d    %0,%1,%2"                                       \
 697              : "=r" (__xx.__ll)                                         \
 698              : "r" ((USItype) (u)),                                     \
 699                "r" ((USItype) (v)));                                    \
 700     (wh) = __xx.__i.__h;                                                \
 701     (wl) = __xx.__i.__l;                                                \
 702   } while (0)
 703 #define udiv_qrnnd(q, r, n1, n0, d) \
 704   ({union {UDItype __ll;                                                \
 705            struct {USItype __h, __l;} __i;                              \
 706           } __xx;                                                       \
 707   USItype __q;                                                          \
 708   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 709   __asm__ ("divu.d %0,%1,%2"                                            \
 710            : "=r" (__q)                                                 \
 711            : "r" (__xx.__ll),                                           \
 712              "r" ((USItype) (d)));                                      \
 713   (r) = (n0) - __q * (d); (q) = __q; })
 714 #define UMUL_TIME 5
 715 #define UDIV_TIME 25
 716 #else
 717 #define UMUL_TIME 17
 718 #define UDIV_TIME 150
 719 #endif /* __mc88110__ */
 720 #endif /* __m88000__ */
 721
 722 #if defined (__mn10300__)
 723 # if defined (__AM33__)
 724 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 725 #  define umul_ppmm(w1, w0, u, v)               \
 726     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 727 #  define smul_ppmm(w1, w0, u, v)               \
 728     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 729 # else
 730 #  define umul_ppmm(w1, w0, u, v)               \
 731     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 732 #  define smul_ppmm(w1, w0, u, v)               \
 733     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 734 # endif
 735 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 736   do {                                          \
 737     DWunion __s, __a, __b;                      \
 738     __a.s.low = (al); __a.s.high = (ah);        \
 739     __b.s.low = (bl); __b.s.high = (bh);        \
 740     __s.ll = __a.ll + __b.ll;                   \
 741     (sl) = __s.s.low; (sh) = __s.s.high;        \
 742   } while (0)
 743 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 744   do {                                          \
 745     DWunion __s, __a, __b;                      \
 746     __a.s.low = (al); __a.s.high = (ah);        \
 747     __b.s.low = (bl); __b.s.high = (bh);        \
 748     __s.ll = __a.ll - __b.ll;                   \
 749     (sl) = __s.s.low; (sh) = __s.s.high;        \
 750   } while (0)
 751 # define udiv_qrnnd(q, r, nh, nl, d)            \
 752   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 753 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 754   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 755 # define UMUL_TIME 3
 756 # define UDIV_TIME 38
 757 #endif
 758
 759 #if defined (__mips__) && W_TYPE_SIZE == 32
 760 #define umul_ppmm(w1, w0, u, v)                                         \
 761   do {                                                                  \
 762     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 763     (w1) = (USItype) (__x >> 32);                                       \
 764     (w0) = (USItype) (__x);                                             \
 765   } while (0)
 766 #define UMUL_TIME 10
 767 #define UDIV_TIME 100
 768
 769 #if (__mips == 32 || __mips == 64) && ! __mips16
 770 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 771 #define COUNT_LEADING_ZEROS_0 32
 772 #endif
 773 #endif /* __mips__ */
 774
 775 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 776 #define umul_ppmm(w1, w0, u, v) \
 777   ({union {UDItype __ll;                                                \
 778            struct {USItype __l, __h;} __i;                              \
 779           } __xx;                                                       \
 780   __asm__ ("meid %2,%0"                                                 \
 781            : "=g" (__xx.__ll)                                           \
 782            : "%0" ((USItype) (u)),                                      \
 783              "g" ((USItype) (v)));                                      \
 784   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 785 #define __umulsidi3(u, v) \
 786   ({UDItype __w;                                                        \
 787     __asm__ ("meid %2,%0"                                               \
 788              : "=g" (__w)                                               \
 789              : "%0" ((USItype) (u)),                                    \
 790                "g" ((USItype) (v)));                                    \
 791     __w; })
 792 #define udiv_qrnnd(q, r, n1, n0, d) \
 793   ({union {UDItype __ll;                                                \
 794            struct {USItype __l, __h;} __i;                              \
 795           } __xx;                                                       \
 796   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 797   __asm__ ("deid %2,%0"                                                 \
 798            : "=g" (__xx.__ll)                                           \
 799            : "0" (__xx.__ll),                                           \
 800              "g" ((USItype) (d)));                                      \
 801   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 802 #define count_trailing_zeros(count,x) \
 803   do {                                                                  \
 804     __asm__ ("ffsd     %2,%0"                                           \
 805             : "=r" ((USItype) (count))                                  \
 806             : "0" ((USItype) 0),                                        \
 807               "r" ((USItype) (x)));                                     \
 808   } while (0)
 809 #endif /* __ns32000__ */
 810
 811 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 812    system vendor compilers.
 813    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 814    enough, since that hits ARM and m68k too.  */
 815 #if (defined (_ARCH_PPC)        /* AIX */                               \
 816      || defined (_ARCH_PWR)     /* AIX */                               \
 817      || defined (_ARCH_COM)     /* AIX */                               \
 818      || defined (__powerpc__)   /* gcc */                               \
 819      || defined (__POWERPC__)   /* BEOS */                              \
 820      || defined (__ppc__)       /* Darwin */                            \
 821      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 822      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 823          && CPU_FAMILY == PPC)                                                \
 824      ) && W_TYPE_SIZE == 32
 825 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 826   do {                                                                  \
 827     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 828       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 829              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 830     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 831       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 832              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 833     else                                                                \
 834       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 835              : "=r" (sh), "=&r" (sl)                                    \
 836              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 837   } while (0)
 838 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 839   do {                                                                  \
 840     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 841       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 842                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 843     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 844       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 845                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 846     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 847       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 848                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 849     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 850       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 851                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 852     else                                                                \
 853       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 854                : "=r" (sh), "=&r" (sl)                                  \
 855                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 856   } while (0)
 857 #define count_leading_zeros(count, x) \
 858   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 859 #define COUNT_LEADING_ZEROS_0 32
 860 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 861   || defined (__ppc__)                                                    \
 862   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 863   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 864          && CPU_FAMILY == PPC)
 865 #define umul_ppmm(ph, pl, m0, m1) \
 866   do {                                                                  \
 867     USItype __m0 = (m0), __m1 = (m1);                                   \
 868     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 869     (pl) = __m0 * __m1;                                                 \
 870   } while (0)
 871 #define UMUL_TIME 15
 872 #define smul_ppmm(ph, pl, m0, m1) \
 873   do {                                                                  \
 874     SItype __m0 = (m0), __m1 = (m1);                                    \
 875     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 876     (pl) = __m0 * __m1;                                                 \
 877   } while (0)
 878 #define SMUL_TIME 14
 879 #define UDIV_TIME 120
 880 #elif defined (_ARCH_PWR)
 881 #define UMUL_TIME 8
 882 #define smul_ppmm(xh, xl, m0, m1) \
 883   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 884 #define SMUL_TIME 4
 885 #define sdiv_qrnnd(q, r, nh, nl, d) \
 886   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 887 #define UDIV_TIME 100
 888 #endif
 889 #endif /* 32-bit POWER architecture variants.  */
 890
 891 /* We should test _IBMR2 here when we add assembly support for the system
 892    vendor compilers.  */
 893 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 894 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 895   do {                                                                  \
 896     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 897       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 898              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 899     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 900       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 901              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 902     else                                                                \
 903       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 904              : "=r" (sh), "=&r" (sl)                                    \
 905              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 906   } while (0)
 907 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 908   do {                                                                  \
 909     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 910       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 911                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 912     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 913       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 914                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 915     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 916       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 917                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 918     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 919       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 920                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 921     else                                                                \
 922       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 923                : "=r" (sh), "=&r" (sl)                                  \
 924                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 925   } while (0)
 926 #define count_leading_zeros(count, x) \
 927   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 928 #define COUNT_LEADING_ZEROS_0 64
 929 #define umul_ppmm(ph, pl, m0, m1) \
 930   do {                                                                  \
 931     UDItype __m0 = (m0), __m1 = (m1);                                   \
 932     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 933     (pl) = __m0 * __m1;                                                 \
 934   } while (0)
 935 #define UMUL_TIME 15
 936 #define smul_ppmm(ph, pl, m0, m1) \
 937   do {                                                                  \
 938     DItype __m0 = (m0), __m1 = (m1);                                    \
 939     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 940     (pl) = __m0 * __m1;                                                 \
 941   } while (0)
 942 #define SMUL_TIME 14  /* ??? */
 943 #define UDIV_TIME 120 /* ??? */
 944 #endif /* 64-bit PowerPC.  */
 945
 946 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 947 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 948   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 949            : "=r" ((USItype) (sh)),                                     \
 950              "=&r" ((USItype) (sl))                                     \
 951            : "%0" ((USItype) (ah)),                                     \
 952              "r" ((USItype) (bh)),                                      \
 953              "%1" ((USItype) (al)),                                     \
 954              "r" ((USItype) (bl)))
 955 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 956   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 957            : "=r" ((USItype) (sh)),                                     \
 958              "=&r" ((USItype) (sl))                                     \
 959            : "0" ((USItype) (ah)),                                      \
 960              "r" ((USItype) (bh)),                                      \
 961              "1" ((USItype) (al)),                                      \
 962              "r" ((USItype) (bl)))
 963 #define umul_ppmm(ph, pl, m0, m1) \
 964   do {                                                                  \
 965     USItype __m0 = (m0), __m1 = (m1);                                   \
 966     __asm__ (                                                           \
 967        "s       r2,r2\n"                                                \
 968 "       mts     r10,%2\n"                                               \
 969 "       m       r2,%3\n"                                                \
 970 "       m       r2,%3\n"                                                \
 971 "       m       r2,%3\n"                                                \
 972 "       m       r2,%3\n"                                                \
 973 "       m       r2,%3\n"                                                \
 974 "       m       r2,%3\n"                                                \
 975 "       m       r2,%3\n"                                                \
 976 "       m       r2,%3\n"                                                \
 977 "       m       r2,%3\n"                                                \
 978 "       m       r2,%3\n"                                                \
 979 "       m       r2,%3\n"                                                \
 980 "       m       r2,%3\n"                                                \
 981 "       m       r2,%3\n"                                                \
 982 "       m       r2,%3\n"                                                \
 983 "       m       r2,%3\n"                                                \
 984 "       m       r2,%3\n"                                                \
 985 "       cas     %0,r2,r0\n"                                             \
 986 "       mfs     r10,%1"                                                 \
 987              : "=r" ((USItype) (ph)),                                   \
 988                "=r" ((USItype) (pl))                                    \
 989              : "%r" (__m0),                                             \
 990                 "r" (__m1)                                              \
 991              : "r2");                                                   \
 992     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
 993              + (((SItype) __m1 >> 31) & __m0));                         \
 994   } while (0)
 995 #define UMUL_TIME 20
 996 #define UDIV_TIME 200
 997 #define count_leading_zeros(count, x) \
 998   do {                                                                  \
 999     if ((x) >= 0x10000)                                                 \
1000       __asm__ ("clz     %0,%1"                                          \
1001                : "=r" ((USItype) (count))                               \
1002                : "r" ((USItype) (x) >> 16));                            \
1003     else                                                                \
1004       {                                                                 \
1005         __asm__ ("clz   %0,%1"                                          \
1006                  : "=r" ((USItype) (count))                             \
1007                  : "r" ((USItype) (x)));                                        \
1008         (count) += 16;                                                  \
1009       }                                                                 \
1010   } while (0)
1011 #endif
1012
1013 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1014 #ifndef __sh1__
1015 #define umul_ppmm(w1, w0, u, v) \
1016   __asm__ (                                                             \
1017        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1018            : "=r<" ((USItype)(w1)),                                     \
1019              "=r<" ((USItype)(w0))                                      \
1020            : "r" ((USItype)(u)),                                        \
1021              "r" ((USItype)(v))                                         \
1022            : "macl", "mach")
1023 #define UMUL_TIME 5
1024 #endif
1025
1026 /* This is the same algorithm as __udiv_qrnnd_c.  */
1027 #define UDIV_NEEDS_NORMALIZATION 1
1028
1029 #define udiv_qrnnd(q, r, n1, n0, d) \
1030   do {                                                                  \
1031     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1032                         __attribute__ ((visibility ("hidden")));        \
1033     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1034     __asm__ (                                                           \
1035         "mov%M4 %4,r5\n"                                                \
1036 "       swap.w %3,r4\n"                                                 \
1037 "       swap.w r5,r6\n"                                                 \
1038 "       jsr @%5\n"                                                      \
1039 "       shll16 r6\n"                                                    \
1040 "       swap.w r4,r4\n"                                                 \
1041 "       jsr @%5\n"                                                      \
1042 "       swap.w r1,%0\n"                                                 \
1043 "       or r1,%0"                                                       \
1044         : "=r" (q), "=&z" (r)                                           \
1045         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1046         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1047   } while (0)
1048
1049 #define UDIV_TIME 80
1050
1051 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1052   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1053            : "=r" (sh), "=r" (sl)                                       \
1054            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1055
1056 #endif /* __sh__ */
1057
1058 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1059 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1060 #define count_leading_zeros(count, x) \
1061   do                                                                    \
1062     {                                                                   \
1063       UDItype x_ = (USItype)(x);                                        \
1064       SItype c_;                                                        \
1065                                                                         \
1066       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1067       (count) = c_ - 31;                                                \
1068     }                                                                   \
1069   while (0)
1070 #define COUNT_LEADING_ZEROS_0 32
1071 #endif
1072
1073 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1074     && W_TYPE_SIZE == 32
1075 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1076   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1077            : "=r" ((USItype) (sh)),                                     \
1078              "=&r" ((USItype) (sl))                                     \
1079            : "%rJ" ((USItype) (ah)),                                    \
1080              "rI" ((USItype) (bh)),                                     \
1081              "%rJ" ((USItype) (al)),                                    \
1082              "rI" ((USItype) (bl))                                      \
1083            __CLOBBER_CC)
1084 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1085   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1086            : "=r" ((USItype) (sh)),                                     \
1087              "=&r" ((USItype) (sl))                                     \
1088            : "rJ" ((USItype) (ah)),                                     \
1089              "rI" ((USItype) (bh)),                                     \
1090              "rJ" ((USItype) (al)),                                     \
1091              "rI" ((USItype) (bl))                                      \
1092            __CLOBBER_CC)
1093 #if defined (__sparc_v8__)
1094 #define umul_ppmm(w1, w0, u, v) \
1095   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1096            : "=r" ((USItype) (w1)),                                     \
1097              "=r" ((USItype) (w0))                                      \
1098            : "r" ((USItype) (u)),                                       \
1099              "r" ((USItype) (v)))
1100 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1101   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1102            : "=&r" ((USItype) (__q)),                                   \
1103              "=&r" ((USItype) (__r))                                    \
1104            : "r" ((USItype) (__n1)),                                    \
1105              "r" ((USItype) (__n0)),                                    \
1106              "r" ((USItype) (__d)))
1107 #else
1108 #if defined (__sparclite__)
1109 /* This has hardware multiply but not divide.  It also has two additional
1110    instructions scan (ffs from high bit) and divscc.  */
1111 #define umul_ppmm(w1, w0, u, v) \
1112   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1113            : "=r" ((USItype) (w1)),                                     \
1114              "=r" ((USItype) (w0))                                      \
1115            : "r" ((USItype) (u)),                                       \
1116              "r" ((USItype) (v)))
1117 #define udiv_qrnnd(q, r, n1, n0, d) \
1118   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1119 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1120 "       tst     %%g0\n"                                                 \
1121 "       divscc  %3,%4,%%g1\n"                                           \
1122 "       divscc  %%g1,%4,%%g1\n"                                         \
1123 "       divscc  %%g1,%4,%%g1\n"                                         \
1124 "       divscc  %%g1,%4,%%g1\n"                                         \
1125 "       divscc  %%g1,%4,%%g1\n"                                         \
1126 "       divscc  %%g1,%4,%%g1\n"                                         \
1127 "       divscc  %%g1,%4,%%g1\n"                                         \
1128 "       divscc  %%g1,%4,%%g1\n"                                         \
1129 "       divscc  %%g1,%4,%%g1\n"                                         \
1130 "       divscc  %%g1,%4,%%g1\n"                                         \
1131 "       divscc  %%g1,%4,%%g1\n"                                         \
1132 "       divscc  %%g1,%4,%%g1\n"                                         \
1133 "       divscc  %%g1,%4,%%g1\n"                                         \
1134 "       divscc  %%g1,%4,%%g1\n"                                         \
1135 "       divscc  %%g1,%4,%%g1\n"                                         \
1136 "       divscc  %%g1,%4,%%g1\n"                                         \
1137 "       divscc  %%g1,%4,%%g1\n"                                         \
1138 "       divscc  %%g1,%4,%%g1\n"                                         \
1139 "       divscc  %%g1,%4,%%g1\n"                                         \
1140 "       divscc  %%g1,%4,%%g1\n"                                         \
1141 "       divscc  %%g1,%4,%%g1\n"                                         \
1142 "       divscc  %%g1,%4,%%g1\n"                                         \
1143 "       divscc  %%g1,%4,%%g1\n"                                         \
1144 "       divscc  %%g1,%4,%%g1\n"                                         \
1145 "       divscc  %%g1,%4,%%g1\n"                                         \
1146 "       divscc  %%g1,%4,%%g1\n"                                         \
1147 "       divscc  %%g1,%4,%%g1\n"                                         \
1148 "       divscc  %%g1,%4,%%g1\n"                                         \
1149 "       divscc  %%g1,%4,%%g1\n"                                         \
1150 "       divscc  %%g1,%4,%%g1\n"                                         \
1151 "       divscc  %%g1,%4,%%g1\n"                                         \
1152 "       divscc  %%g1,%4,%0\n"                                           \
1153 "       rd      %%y,%1\n"                                               \
1154 "       bl,a 1f\n"                                                      \
1155 "       add     %1,%4,%1\n"                                             \
1156 "1:     ! End of inline udiv_qrnnd"                                     \
1157            : "=r" ((USItype) (q)),                                      \
1158              "=r" ((USItype) (r))                                       \
1159            : "r" ((USItype) (n1)),                                      \
1160              "r" ((USItype) (n0)),                                      \
1161              "rI" ((USItype) (d))                                       \
1162            : "g1" __AND_CLOBBER_CC)
1163 #define UDIV_TIME 37
1164 #define count_leading_zeros(count, x) \
1165   do {                                                                  \
1166   __asm__ ("scan %1,1,%0"                                               \
1167            : "=r" ((USItype) (count))                                   \
1168            : "r" ((USItype) (x)));                                      \
1169   } while (0)
1170 /* Early sparclites return 63 for an argument of 0, but they warn that future
1171    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1172    undefined.  */
1173 #else
1174 /* SPARC without integer multiplication and divide instructions.
1175    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1176 #define umul_ppmm(w1, w0, u, v) \
1177   __asm__ ("! Inlined umul_ppmm\n"                                      \
1178 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1179 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1180 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1181 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1182 "       mulscc  %%g1,%3,%%g1\n"                                         \
1183 "       mulscc  %%g1,%3,%%g1\n"                                         \
1184 "       mulscc  %%g1,%3,%%g1\n"                                         \
1185 "       mulscc  %%g1,%3,%%g1\n"                                         \
1186 "       mulscc  %%g1,%3,%%g1\n"                                         \
1187 "       mulscc  %%g1,%3,%%g1\n"                                         \
1188 "       mulscc  %%g1,%3,%%g1\n"                                         \
1189 "       mulscc  %%g1,%3,%%g1\n"                                         \
1190 "       mulscc  %%g1,%3,%%g1\n"                                         \
1191 "       mulscc  %%g1,%3,%%g1\n"                                         \
1192 "       mulscc  %%g1,%3,%%g1\n"                                         \
1193 "       mulscc  %%g1,%3,%%g1\n"                                         \
1194 "       mulscc  %%g1,%3,%%g1\n"                                         \
1195 "       mulscc  %%g1,%3,%%g1\n"                                         \
1196 "       mulscc  %%g1,%3,%%g1\n"                                         \
1197 "       mulscc  %%g1,%3,%%g1\n"                                         \
1198 "       mulscc  %%g1,%3,%%g1\n"                                         \
1199 "       mulscc  %%g1,%3,%%g1\n"                                         \
1200 "       mulscc  %%g1,%3,%%g1\n"                                         \
1201 "       mulscc  %%g1,%3,%%g1\n"                                         \
1202 "       mulscc  %%g1,%3,%%g1\n"                                         \
1203 "       mulscc  %%g1,%3,%%g1\n"                                         \
1204 "       mulscc  %%g1,%3,%%g1\n"                                         \
1205 "       mulscc  %%g1,%3,%%g1\n"                                         \
1206 "       mulscc  %%g1,%3,%%g1\n"                                         \
1207 "       mulscc  %%g1,%3,%%g1\n"                                         \
1208 "       mulscc  %%g1,%3,%%g1\n"                                         \
1209 "       mulscc  %%g1,%3,%%g1\n"                                         \
1210 "       mulscc  %%g1,%3,%%g1\n"                                         \
1211 "       mulscc  %%g1,%3,%%g1\n"                                         \
1212 "       mulscc  %%g1,%3,%%g1\n"                                         \
1213 "       mulscc  %%g1,%3,%%g1\n"                                         \
1214 "       mulscc  %%g1,0,%%g1\n"                                          \
1215 "       add     %%g1,%%o5,%0\n"                                         \
1216 "       rd      %%y,%1"                                                 \
1217            : "=r" ((USItype) (w1)),                                     \
1218              "=r" ((USItype) (w0))                                      \
1219            : "%rI" ((USItype) (u)),                                     \
1220              "r" ((USItype) (v))                                                \
1221            : "g1", "o5" __AND_CLOBBER_CC)
1222 #define UMUL_TIME 39            /* 39 instructions */
1223 /* It's quite necessary to add this much assembler for the sparc.
1224    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1225 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1226   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1227 "       mov     32,%%g1\n"                                              \
1228 "       subcc   %1,%2,%%g0\n"                                           \
1229 "1:     bcs     5f\n"                                                   \
1230 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1231 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1232 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1233 "       subcc   %%g1,1,%%g1\n"                                          \
1234 "2:     bne     1b\n"                                                   \
1235 "        subcc  %1,%2,%%g0\n"                                           \
1236 "       bcs     3f\n"                                                   \
1237 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1238 "       b       3f\n"                                                   \
1239 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1240 "4:     sub     %1,%2,%1\n"                                             \
1241 "5:     addxcc  %1,%1,%1\n"                                             \
1242 "       bcc     2b\n"                                                   \
1243 "        subcc  %%g1,1,%%g1\n"                                          \
1244 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1245 "       bne     4b\n"                                                   \
1246 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1247 "       sub     %1,%2,%1\n"                                             \
1248 "3:     xnor    %0,0,%0\n"                                              \
1249 "       ! End of inline udiv_qrnnd"                                     \
1250            : "=&r" ((USItype) (__q)),                                   \
1251              "=&r" ((USItype) (__r))                                    \
1252            : "r" ((USItype) (__d)),                                     \
1253              "1" ((USItype) (__n1)),                                    \
1254              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1255 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1256 #endif /* __sparclite__ */
1257 #endif /* __sparc_v8__ */
1258 #endif /* sparc32 */
1259
1260 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1261     && W_TYPE_SIZE == 64
1262 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1263   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1264            "add %r2,%3,%0\n\t"                                          \
1265            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1266            "add %0, 1, %0\n"                                            \
1267            "1:"                                                         \
1268            : "=r" ((UDItype)(sh)),                                      \
1269              "=&r" ((UDItype)(sl))                                      \
1270            : "%rJ" ((UDItype)(ah)),                                     \
1271              "rI" ((UDItype)(bh)),                                      \
1272              "%rJ" ((UDItype)(al)),                                     \
1273              "rI" ((UDItype)(bl))                                       \
1274            __CLOBBER_CC)
1275
1276 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1277   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1278            "sub %r2,%3,%0\n\t"                                          \
1279            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1280            "sub %0, 1, %0\n\t"                                          \
1281            "1:"                                                         \
1282            : "=r" ((UDItype)(sh)),                                      \
1283              "=&r" ((UDItype)(sl))                                      \
1284            : "rJ" ((UDItype)(ah)),                                      \
1285              "rI" ((UDItype)(bh)),                                      \
1286              "rJ" ((UDItype)(al)),                                      \
1287              "rI" ((UDItype)(bl))                                       \
1288            __CLOBBER_CC)
1289
1290 #define umul_ppmm(wh, wl, u, v)                                         \
1291   do {                                                                  \
1292           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1293           __asm__ __volatile__ (                                        \
1294                    "srl %7,0,%3\n\t"                                    \
1295                    "mulx %3,%6,%1\n\t"                                  \
1296                    "srlx %6,32,%2\n\t"                                  \
1297                    "mulx %2,%3,%4\n\t"                                  \
1298                    "sllx %4,32,%5\n\t"                                  \
1299                    "srl %6,0,%3\n\t"                                    \
1300                    "sub %1,%5,%5\n\t"                                   \
1301                    "srlx %5,32,%5\n\t"                                  \
1302                    "addcc %4,%5,%4\n\t"                                 \
1303                    "srlx %7,32,%5\n\t"                                  \
1304                    "mulx %3,%5,%3\n\t"                                  \
1305                    "mulx %2,%5,%5\n\t"                                  \
1306                    "sethi %%hi(0x80000000),%2\n\t"                      \
1307                    "addcc %4,%3,%4\n\t"                                 \
1308                    "srlx %4,32,%4\n\t"                                  \
1309                    "add %2,%2,%2\n\t"                                   \
1310                    "movcc %%xcc,%%g0,%2\n\t"                            \
1311                    "addcc %5,%4,%5\n\t"                                 \
1312                    "sllx %3,32,%3\n\t"                                  \
1313                    "add %1,%3,%1\n\t"                                   \
1314                    "add %5,%2,%0"                                       \
1315            : "=r" ((UDItype)(wh)),                                      \
1316              "=&r" ((UDItype)(wl)),                                     \
1317              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1318            : "r" ((UDItype)(u)),                                        \
1319              "r" ((UDItype)(v))                                         \
1320            __CLOBBER_CC);                                               \
1321   } while (0)
1322 #define UMUL_TIME 96
1323 #define UDIV_TIME 230
1324 #endif /* sparc64 */
1325
1326 #if defined (__vax__) && W_TYPE_SIZE == 32
1327 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1328   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1329            : "=g" ((USItype) (sh)),                                     \
1330              "=&g" ((USItype) (sl))                                     \
1331            : "%0" ((USItype) (ah)),                                     \
1332              "g" ((USItype) (bh)),                                      \
1333              "%1" ((USItype) (al)),                                     \
1334              "g" ((USItype) (bl)))
1335 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1336   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1337            : "=g" ((USItype) (sh)),                                     \
1338              "=&g" ((USItype) (sl))                                     \
1339            : "0" ((USItype) (ah)),                                      \
1340              "g" ((USItype) (bh)),                                      \
1341              "1" ((USItype) (al)),                                      \
1342              "g" ((USItype) (bl)))
1343 #define umul_ppmm(xh, xl, m0, m1) \
1344   do {                                                                  \
1345     union {                                                             \
1346         UDItype __ll;                                                   \
1347         struct {USItype __l, __h;} __i;                                 \
1348       } __xx;                                                           \
1349     USItype __m0 = (m0), __m1 = (m1);                                   \
1350     __asm__ ("emul %1,%2,$0,%0"                                         \
1351              : "=r" (__xx.__ll)                                         \
1352              : "g" (__m0),                                              \
1353                "g" (__m1));                                             \
1354     (xh) = __xx.__i.__h;                                                \
1355     (xl) = __xx.__i.__l;                                                \
1356     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1357              + (((SItype) __m1 >> 31) & __m0));                         \
1358   } while (0)
1359 #define sdiv_qrnnd(q, r, n1, n0, d) \
1360   do {                                                                  \
1361     union {DItype __ll;                                                 \
1362            struct {SItype __l, __h;} __i;                               \
1363           } __xx;                                                       \
1364     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1365     __asm__ ("ediv %3,%2,%0,%1"                                         \
1366              : "=g" (q), "=g" (r)                                       \
1367              : "g" (__xx.__ll), "g" (d));                               \
1368   } while (0)
1369 #endif /* __vax__ */
1370
1371 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1372 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1373    to expand builtin functions depending on what configuration features
1374    are available.  This avoids library calls when the operation can be
1375    performed in-line.  */
1376 #define umul_ppmm(w1, w0, u, v)                                         \
1377   do {                                                                  \
1378     DWunion __w;                                                        \
1379     __w.ll = __builtin_umulsidi3 (u, v);                                \
1380     w1 = __w.s.high;                                                    \
1381     w0 = __w.s.low;                                                     \
1382   } while (0)
1383 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1384 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1385 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1386 #endif /* __xtensa__ */
1387
1388 #if defined xstormy16
1389 extern UHItype __stormy16_count_leading_zeros (UHItype);
1390 #define count_leading_zeros(count, x)                                   \
1391   do                                                                    \
1392     {                                                                   \
1393       UHItype size;                                                     \
1394                                                                         \
1395       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1396       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1397         {                                                               \
1398           UHItype c;                                                    \
1399                                                                         \
1400           c = __clzhi2 ((x) >> (size - 16));                            \
1401           (count) += c;                                                 \
1402           if (c != 16)                                                  \
1403             break;                                                      \
1404         }                                                               \
1405     }                                                                   \
1406   while (0)
1407 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1408 #endif
1409
1410 #if defined (__z8000__) && W_TYPE_SIZE == 16
1411 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1412   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1413            : "=r" ((unsigned int)(sh)),                                 \
1414              "=&r" ((unsigned int)(sl))                                 \
1415            : "%0" ((unsigned int)(ah)),                                 \
1416              "r" ((unsigned int)(bh)),                                  \
1417              "%1" ((unsigned int)(al)),                                 \
1418              "rQR" ((unsigned int)(bl)))
1419 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1420   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1421            : "=r" ((unsigned int)(sh)),                                 \
1422              "=&r" ((unsigned int)(sl))                                 \
1423            : "0" ((unsigned int)(ah)),                                  \
1424              "r" ((unsigned int)(bh)),                                  \
1425              "1" ((unsigned int)(al)),                                  \
1426              "rQR" ((unsigned int)(bl)))
1427 #define umul_ppmm(xh, xl, m0, m1) \
1428   do {                                                                  \
1429     union {long int __ll;                                               \
1430            struct {unsigned int __h, __l;} __i;                         \
1431           } __xx;                                                       \
1432     unsigned int __m0 = (m0), __m1 = (m1);                              \
1433     __asm__ ("mult      %S0,%H3"                                        \
1434              : "=r" (__xx.__i.__h),                                     \
1435                "=r" (__xx.__i.__l)                                      \
1436              : "%1" (__m0),                                             \
1437                "rQR" (__m1));                                           \
1438     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1439     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1440              + (((signed int) __m1 >> 15) & __m0));                     \
1441   } while (0)
1442 #endif /* __z8000__ */
1443
1444 #endif /* __GNUC__ */
1445
1446 /* If this machine has no inline assembler, use C macros.  */
1447
1448 #if !defined (add_ssaaaa)
1449 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1450   do {                                                                  \
1451     UWtype __x;                                                         \
1452     __x = (al) + (bl);                                                  \
1453     (sh) = (ah) + (bh) + (__x < (al));                                  \
1454     (sl) = __x;                                                         \
1455   } while (0)
1456 #endif
1457
1458 #if !defined (sub_ddmmss)
1459 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1460   do {                                                                  \
1461     UWtype __x;                                                         \
1462     __x = (al) - (bl);                                                  \
1463     (sh) = (ah) - (bh) - (__x > (al));                                  \
1464     (sl) = __x;                                                         \
1465   } while (0)
1466 #endif
1467
1468 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1469    smul_ppmm.  */
1470 #if !defined (umul_ppmm) && defined (smul_ppmm)
1471 #define umul_ppmm(w1, w0, u, v)                                         \
1472   do {                                                                  \
1473     UWtype __w1;                                                        \
1474     UWtype __xm0 = (u), __xm1 = (v);                                    \
1475     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1476     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1477                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1478   } while (0)
1479 #endif
1480
1481 /* If we still don't have umul_ppmm, define it using plain C.  */
1482 #if !defined (umul_ppmm)
1483 #define umul_ppmm(w1, w0, u, v)                                         \
1484   do {                                                                  \
1485     UWtype __x0, __x1, __x2, __x3;                                      \
1486     UHWtype __ul, __vl, __uh, __vh;                                     \
1487                                                                         \
1488     __ul = __ll_lowpart (u);                                            \
1489     __uh = __ll_highpart (u);                                           \
1490     __vl = __ll_lowpart (v);                                            \
1491     __vh = __ll_highpart (v);                                           \
1492                                                                         \
1493     __x0 = (UWtype) __ul * __vl;                                        \
1494     __x1 = (UWtype) __ul * __vh;                                        \
1495     __x2 = (UWtype) __uh * __vl;                                        \
1496     __x3 = (UWtype) __uh * __vh;                                        \
1497                                                                         \
1498     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1499     __x1 += __x2;               /* but this indeed can */               \
1500     if (__x1 < __x2)            /* did we get it? */                    \
1501       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1502                                                                         \
1503     (w1) = __x3 + __ll_highpart (__x1);                                 \
1504     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1505   } while (0)
1506 #endif
1507
1508 #if !defined (__umulsidi3)
1509 #define __umulsidi3(u, v) \
1510   ({DWunion __w;                                                        \
1511     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1512     __w.ll; })
1513 #endif
1514
1515 /* Define this unconditionally, so it can be used for debugging.  */
1516 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1517   do {                                                                  \
1518     UWtype __d1, __d0, __q1, __q0;                                      \
1519     UWtype __r1, __r0, __m;                                             \
1520     __d1 = __ll_highpart (d);                                           \
1521     __d0 = __ll_lowpart (d);                                            \
1522                                                                         \
1523     __r1 = (n1) % __d1;                                                 \
1524     __q1 = (n1) / __d1;                                                 \
1525     __m = (UWtype) __q1 * __d0;                                         \
1526     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1527     if (__r1 < __m)                                                     \
1528       {                                                                 \
1529         __q1--, __r1 += (d);                                            \
1530         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1531           if (__r1 < __m)                                               \
1532             __q1--, __r1 += (d);                                        \
1533       }                                                                 \
1534     __r1 -= __m;                                                        \
1535                                                                         \
1536     __r0 = __r1 % __d1;                                                 \
1537     __q0 = __r1 / __d1;                                                 \
1538     __m = (UWtype) __q0 * __d0;                                         \
1539     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1540     if (__r0 < __m)                                                     \
1541       {                                                                 \
1542         __q0--, __r0 += (d);                                            \
1543         if (__r0 >= (d))                                                \
1544           if (__r0 < __m)                                               \
1545             __q0--, __r0 += (d);                                        \
1546       }                                                                 \
1547     __r0 -= __m;                                                        \
1548                                                                         \
1549     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1550     (r) = __r0;                                                         \
1551   } while (0)
1552
1553 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1554    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1555 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1556 #define udiv_qrnnd(q, r, nh, nl, d) \
1557   do {                                                                  \
1558     USItype __r;                                                        \
1559     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1560     (r) = __r;                                                          \
1561   } while (0)
1562 #endif
1563
1564 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1565 #if !defined (udiv_qrnnd)
1566 #define UDIV_NEEDS_NORMALIZATION 1
1567 #define udiv_qrnnd __udiv_qrnnd_c
1568 #endif
1569
1570 #if !defined (count_leading_zeros)
1571 #define count_leading_zeros(count, x) \
1572   do {                                                                  \
1573     UWtype __xr = (x);                                                  \
1574     UWtype __a;                                                         \
1575                                                                         \
1576     if (W_TYPE_SIZE <= 32)                                              \
1577       {                                                                 \
1578         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1579           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1580           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1581       }                                                                 \
1582     else                                                                \
1583       {                                                                 \
1584         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1585           if (((__xr >> __a) & 0xff) != 0)                              \
1586             break;                                                      \
1587       }                                                                 \
1588                                                                         \
1589     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1590   } while (0)
1591 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1592 #endif
1593
1594 #if !defined (count_trailing_zeros)
1595 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1596    defined in asm, but if it is not, the C version above is good enough.  */
1597 #define count_trailing_zeros(count, x) \
1598   do {                                                                  \
1599     UWtype __ctz_x = (x);                                               \
1600     UWtype __ctz_c;                                                     \
1601     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1602     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1603   } while (0)
1604 #endif
1605
1606 #ifndef UDIV_NEEDS_NORMALIZATION
1607 #define UDIV_NEEDS_NORMALIZATION 0
1608 #endif