gcc/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, write to the Free
  29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  30    MA 02110-1301, USA.  */
  31
  32 /* You have to define the following before including this file:
  33
  34    UWtype -- An unsigned type, default type for operations (typically a "word")
  35    UHWtype -- An unsigned type, at least half the size of UWtype.
  36    UDWtype -- An unsigned type, at least twice as large a UWtype
  37    W_TYPE_SIZE -- size in bits of UWtype
  38
  39    UQItype -- Unsigned 8 bit type.
  40    SItype, USItype -- Signed and unsigned 32 bit types.
  41    DItype, UDItype -- Signed and unsigned 64 bit types.
  42
  43    On a 32 bit machine UWtype should typically be USItype;
  44    on a 64 bit machine, UWtype should typically be UDItype.  */
  45
  46 #define __BITS4 (W_TYPE_SIZE / 4)
  47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  50
  51 #ifndef W_TYPE_SIZE
  52 #define W_TYPE_SIZE     32
  53 #define UWtype          USItype
  54 #define UHWtype         USItype
  55 #define UDWtype         UDItype
  56 #endif
  57
  58 /* Used in glibc only.  */
  59 #ifndef attribute_hidden
  60 #define attribute_hidden
  61 #endif
  62
  63 extern const UQItype __clz_tab[256] attribute_hidden;
  64
  65 /* Define auxiliary asm macros.
  66
  67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  69    word product in HIGH_PROD and LOW_PROD.
  70
  71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  72    UDWtype product.  This is just a variant of umul_ppmm.
  73
  74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  75    denominator) divides a UDWtype, composed by the UWtype integers
  76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  78    than DENOMINATOR for correct operation.  If, in addition, the most
  79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  80    UDIV_NEEDS_NORMALIZATION is defined to 1.
  81
  82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  84    is rounded towards 0.
  85
  86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  87    msb to the first nonzero bit in the UWtype X.  This is the number of
  88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  90
  91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  92    from the least significant end.
  93
  94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  98    (i.e. carry out) is not stored anywhere, and is lost.
  99
 100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 105    and is lost.
 106
 107    If any of these macros are left undefined for a particular CPU,
 108    C macros are used.  */
 109
 110 /* The CPUs come in alphabetical order below.
 111
 112    Please add support for more CPUs here, or improve the current support
 113    for the CPUs below!
 114    (E.g. WE32100, IBM360.)  */
 115
 116 #if defined (__GNUC__) && !defined (NO_ASM)
 117
 118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 119    understood by gcc1.  Use cpp to avoid major code duplication.  */
 120 #if __GNUC__ < 2
 121 #define __CLOBBER_CC
 122 #define __AND_CLOBBER_CC
 123 #else /* __GNUC__ >= 2 */
 124 #define __CLOBBER_CC : "cc"
 125 #define __AND_CLOBBER_CC , "cc"
 126 #endif /* __GNUC__ < 2 */
 127
 128 #if defined (__alpha) && W_TYPE_SIZE == 64
 129 #define umul_ppmm(ph, pl, m0, m1) \
 130   do {                                                                  \
 131     UDItype __m0 = (m0), __m1 = (m1);                                   \
 132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 133     (pl) = __m0 * __m1;                                                 \
 134   } while (0)
 135 #define UMUL_TIME 46
 136 #ifndef LONGLONG_STANDALONE
 137 #define udiv_qrnnd(q, r, n1, n0, d) \
 138   do { UDItype __r;                                                     \
 139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 140     (r) = __r;                                                          \
 141   } while (0)
 142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 143 #define UDIV_TIME 220
 144 #endif /* LONGLONG_STANDALONE */
 145 #ifdef __alpha_cix__
 146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 148 #define COUNT_LEADING_ZEROS_0 64
 149 #else
 150 #define count_leading_zeros(COUNT,X) \
 151   do {                                                                  \
 152     UDItype __xr = (X), __t, __a;                                       \
 153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 155     __t = __builtin_alpha_extbl (__xr, __a);                            \
 156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 157   } while (0)
 158 #define count_trailing_zeros(COUNT,X) \
 159   do {                                                                  \
 160     UDItype __xr = (X), __t, __a;                                       \
 161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 162     __t = ~__t & -~__t;                                                 \
 163     __a = ((__t & 0xCC) != 0) * 2;                                      \
 164     __a += ((__t & 0xF0) != 0) * 4;                                     \
 165     __a += ((__t & 0xAA) != 0);                                         \
 166     __t = __builtin_alpha_extbl (__xr, __a);                            \
 167     __a <<= 3;                                                          \
 168     __t &= -__t;                                                        \
 169     __a += ((__t & 0xCC) != 0) * 2;                                     \
 170     __a += ((__t & 0xF0) != 0) * 4;                                     \
 171     __a += ((__t & 0xAA) != 0);                                         \
 172     (COUNT) = __a;                                                      \
 173   } while (0)
 174 #endif /* __alpha_cix__ */
 175 #endif /* __alpha */
 176
 177 #if defined (__arc__) && W_TYPE_SIZE == 32
 178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 180            : "=r" ((USItype) (sh)),                                     \
 181              "=&r" ((USItype) (sl))                                     \
 182            : "%r" ((USItype) (ah)),                                     \
 183              "rIJ" ((USItype) (bh)),                                    \
 184              "%r" ((USItype) (al)),                                     \
 185              "rIJ" ((USItype) (bl)))
 186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 188            : "=r" ((USItype) (sh)),                                     \
 189              "=&r" ((USItype) (sl))                                     \
 190            : "r" ((USItype) (ah)),                                      \
 191              "rIJ" ((USItype) (bh)),                                    \
 192              "r" ((USItype) (al)),                                      \
 193              "rIJ" ((USItype) (bl)))
 194 /* Call libgcc routine.  */
 195 #define umul_ppmm(w1, w0, u, v) \
 196 do {                                                                    \
 197   DWunion __w;                                                          \
 198   __w.ll = __umulsidi3 (u, v);                                          \
 199   w1 = __w.s.high;                                                      \
 200   w0 = __w.s.low;                                                       \
 201 } while (0)
 202 #define __umulsidi3 __umulsidi3
 203 UDItype __umulsidi3 (USItype, USItype);
 204 #endif
 205
 206 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 207 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 208   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 209            : "=r" ((USItype) (sh)),                                     \
 210              "=&r" ((USItype) (sl))                                     \
 211            : "%r" ((USItype) (ah)),                                     \
 212              "rI" ((USItype) (bh)),                                     \
 213              "%r" ((USItype) (al)),                                     \
 214              "rI" ((USItype) (bl)) __CLOBBER_CC)
 215 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 216   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 217            : "=r" ((USItype) (sh)),                                     \
 218              "=&r" ((USItype) (sl))                                     \
 219            : "r" ((USItype) (ah)),                                      \
 220              "rI" ((USItype) (bh)),                                     \
 221              "r" ((USItype) (al)),                                      \
 222              "rI" ((USItype) (bl)) __CLOBBER_CC)
 223 #define umul_ppmm(xh, xl, a, b) \
 224 {register USItype __t0, __t1, __t2;                                     \
 225   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
 226            "    mov     %2, %5, lsr #16\n"                              \
 227            "    mov     %0, %6, lsr #16\n"                              \
 228            "    bic     %3, %5, %2, lsl #16\n"                          \
 229            "    bic     %4, %6, %0, lsl #16\n"                          \
 230            "    mul     %1, %3, %4\n"                                   \
 231            "    mul     %4, %2, %4\n"                                   \
 232            "    mul     %3, %0, %3\n"                                   \
 233            "    mul     %0, %2, %0\n"                                   \
 234            "    adds    %3, %4, %3\n"                                   \
 235            "    addcs   %0, %0, #65536\n"                               \
 236            "    adds    %1, %1, %3, lsl #16\n"                          \
 237            "    adc     %0, %0, %3, lsr #16"                            \
 238            : "=&r" ((USItype) (xh)),                                    \
 239              "=r" ((USItype) (xl)),                                     \
 240              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 241            : "r" ((USItype) (a)),                                       \
 242              "r" ((USItype) (b)) __CLOBBER_CC );}
 243 #define UMUL_TIME 20
 244 #define UDIV_TIME 100
 245 #endif /* __arm__ */
 246
 247 #if defined(__arm__)
 248 /* Let gcc decide how best to implement count_leading_zeros.  */
 249 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 250 #define COUNT_LEADING_ZEROS_0 32
 251 #endif
 252
 253 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 254 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 255 #if __CRIS_arch_version >= 8
 256 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 257 #endif
 258 #endif /* __CRIS__ */
 259
 260 #if defined (__hppa) && W_TYPE_SIZE == 32
 261 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 262   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 263            : "=r" ((USItype) (sh)),                                     \
 264              "=&r" ((USItype) (sl))                                     \
 265            : "%rM" ((USItype) (ah)),                                    \
 266              "rM" ((USItype) (bh)),                                     \
 267              "%rM" ((USItype) (al)),                                    \
 268              "rM" ((USItype) (bl)))
 269 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 270   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 271            : "=r" ((USItype) (sh)),                                     \
 272              "=&r" ((USItype) (sl))                                     \
 273            : "rM" ((USItype) (ah)),                                     \
 274              "rM" ((USItype) (bh)),                                     \
 275              "rM" ((USItype) (al)),                                     \
 276              "rM" ((USItype) (bl)))
 277 #if defined (_PA_RISC1_1)
 278 #define umul_ppmm(w1, w0, u, v) \
 279   do {                                                                  \
 280     union                                                               \
 281       {                                                                 \
 282         UDItype __f;                                                    \
 283         struct {USItype __w1, __w0;} __w1w0;                            \
 284       } __t;                                                            \
 285     __asm__ ("xmpyu %1,%2,%0"                                           \
 286              : "=x" (__t.__f)                                           \
 287              : "x" ((USItype) (u)),                                     \
 288                "x" ((USItype) (v)));                                    \
 289     (w1) = __t.__w1w0.__w1;                                             \
 290     (w0) = __t.__w1w0.__w0;                                             \
 291      } while (0)
 292 #define UMUL_TIME 8
 293 #else
 294 #define UMUL_TIME 30
 295 #endif
 296 #define UDIV_TIME 40
 297 #define count_leading_zeros(count, x) \
 298   do {                                                                  \
 299     USItype __tmp;                                                      \
 300     __asm__ (                                                           \
 301        "ldi             1,%0\n"                                         \
 302 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 303 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 304 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 305 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 306 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 307 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 308 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 309 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 310 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 311 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 312 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 313 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 314 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 315 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 316         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 317   } while (0)
 318 #endif
 319
 320 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 321 #if !defined (__zarch__)
 322 #define smul_ppmm(xh, xl, m0, m1) \
 323   do {                                                                  \
 324     union {DItype __ll;                                                 \
 325            struct {USItype __h, __l;} __i;                              \
 326           } __x;                                                        \
 327     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 328              : "=&r" (__x.__ll)                                         \
 329              : "r" (m0), "r" (m1));                                     \
 330     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 331   } while (0)
 332 #define sdiv_qrnnd(q, r, n1, n0, d) \
 333   do {                                                                  \
 334     union {DItype __ll;                                                 \
 335            struct {USItype __h, __l;} __i;                              \
 336           } __x;                                                        \
 337     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 338     __asm__ ("dr %0,%2"                                                 \
 339              : "=r" (__x.__ll)                                          \
 340              : "0" (__x.__ll), "r" (d));                                \
 341     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 342   } while (0)
 343 #else
 344 #define smul_ppmm(xh, xl, m0, m1) \
 345   do {                                                                  \
 346     register SItype r0 __asm__ ("0");                                   \
 347     register SItype r1 __asm__ ("1") = m0;                              \
 348                                                                         \
 349     __asm__ ("mr\t%%r0,%3"                                              \
 350              : "=r" (r0), "=r" (r1)                                     \
 351              : "r"  (r1),  "r" (m1));                                   \
 352     (xh) = r1; (xl) = r0;                                               \
 353   } while (0)
 354 #define sdiv_qrnnd(q, r, n1, n0, d) \
 355   do {                                                                  \
 356     register SItype r0 __asm__ ("0") = n0;                              \
 357     register SItype r1 __asm__ ("1") = n1;                              \
 358                                                                         \
 359     __asm__ ("dr\t%%r0,%3"                                              \
 360              : "=r" (r0), "=r" (r1)                                     \
 361              : "r" (r0), "r" (r1), "r" (d));                            \
 362     (q) = r0; (r) = r1;                                                 \
 363   } while (0)
 364 #endif /* __zarch__ */
 365 #endif
 366
 367 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 368 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 369   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 370            : "=r" ((USItype) (sh)),                                     \
 371              "=&r" ((USItype) (sl))                                     \
 372            : "%0" ((USItype) (ah)),                                     \
 373              "g" ((USItype) (bh)),                                      \
 374              "%1" ((USItype) (al)),                                     \
 375              "g" ((USItype) (bl)))
 376 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 377   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 378            : "=r" ((USItype) (sh)),                                     \
 379              "=&r" ((USItype) (sl))                                     \
 380            : "0" ((USItype) (ah)),                                      \
 381              "g" ((USItype) (bh)),                                      \
 382              "1" ((USItype) (al)),                                      \
 383              "g" ((USItype) (bl)))
 384 #define umul_ppmm(w1, w0, u, v) \
 385   __asm__ ("mul{l} %3"                                                  \
 386            : "=a" ((USItype) (w0)),                                     \
 387              "=d" ((USItype) (w1))                                      \
 388            : "%0" ((USItype) (u)),                                      \
 389              "rm" ((USItype) (v)))
 390 #define udiv_qrnnd(q, r, n1, n0, dv) \
 391   __asm__ ("div{l} %4"                                                  \
 392            : "=a" ((USItype) (q)),                                      \
 393              "=d" ((USItype) (r))                                       \
 394            : "0" ((USItype) (n0)),                                      \
 395              "1" ((USItype) (n1)),                                      \
 396              "rm" ((USItype) (dv)))
 397 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 398 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 399 #define UMUL_TIME 40
 400 #define UDIV_TIME 40
 401 #endif /* 80x86 */
 402
 403 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 404 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 405   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 406            : "=r" ((UDItype) (sh)),                                     \
 407              "=&r" ((UDItype) (sl))                                     \
 408            : "%0" ((UDItype) (ah)),                                     \
 409              "rme" ((UDItype) (bh)),                                    \
 410              "%1" ((UDItype) (al)),                                     \
 411              "rme" ((UDItype) (bl)))
 412 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 413   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 414            : "=r" ((UDItype) (sh)),                                     \
 415              "=&r" ((UDItype) (sl))                                     \
 416            : "0" ((UDItype) (ah)),                                      \
 417              "rme" ((UDItype) (bh)),                                    \
 418              "1" ((UDItype) (al)),                                      \
 419              "rme" ((UDItype) (bl)))
 420 #define umul_ppmm(w1, w0, u, v) \
 421   __asm__ ("mul{q} %3"                                                  \
 422            : "=a" ((UDItype) (w0)),                                     \
 423              "=d" ((UDItype) (w1))                                      \
 424            : "%0" ((UDItype) (u)),                                      \
 425              "rm" ((UDItype) (v)))
 426 #define udiv_qrnnd(q, r, n1, n0, dv) \
 427   __asm__ ("div{q} %4"                                                  \
 428            : "=a" ((UDItype) (q)),                                      \
 429              "=d" ((UDItype) (r))                                       \
 430            : "0" ((UDItype) (n0)),                                      \
 431              "1" ((UDItype) (n1)),                                      \
 432              "rm" ((UDItype) (dv)))
 433 #define count_leading_zeros(count, x)   ((count) = __builtin_clzl (x))
 434 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzl (x))
 435 #define UMUL_TIME 40
 436 #define UDIV_TIME 40
 437 #endif /* x86_64 */
 438
 439 #if defined (__i960__) && W_TYPE_SIZE == 32
 440 #define umul_ppmm(w1, w0, u, v) \
 441   ({union {UDItype __ll;                                                \
 442            struct {USItype __l, __h;} __i;                              \
 443           } __xx;                                                       \
 444   __asm__ ("emul        %2,%1,%0"                                       \
 445            : "=d" (__xx.__ll)                                           \
 446            : "%dI" ((USItype) (u)),                                     \
 447              "dI" ((USItype) (v)));                                     \
 448   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 449 #define __umulsidi3(u, v) \
 450   ({UDItype __w;                                                        \
 451     __asm__ ("emul      %2,%1,%0"                                       \
 452              : "=d" (__w)                                               \
 453              : "%dI" ((USItype) (u)),                                   \
 454                "dI" ((USItype) (v)));                                   \
 455     __w; })
 456 #endif /* __i960__ */
 457
 458 #if defined (__ia64) && W_TYPE_SIZE == 64
 459 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 460    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 461    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 462    register, which takes an extra cycle.  */
 463 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 464   do {                                                                  \
 465     UWtype __x;                                                         \
 466     __x = (al) - (bl);                                                  \
 467     if ((al) < (bl))                                                    \
 468       (sh) = (ah) - (bh) - 1;                                           \
 469     else                                                                \
 470       (sh) = (ah) - (bh);                                               \
 471     (sl) = __x;                                                         \
 472   } while (0)
 473
 474 /* Do both product parts in assembly, since that gives better code with
 475    all gcc versions.  Some callers will just use the upper part, and in
 476    that situation we waste an instruction, but not any cycles.  */
 477 #define umul_ppmm(ph, pl, m0, m1)                                       \
 478   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 479            : "=&f" (ph), "=f" (pl)                                      \
 480            : "f" (m0), "f" (m1))
 481 #define count_leading_zeros(count, x)                                   \
 482   do {                                                                  \
 483     UWtype _x = (x), _y, _a, _c;                                        \
 484     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 485     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 486     _c = (_a - 1) << 3;                                                 \
 487     _x >>= _c;                                                          \
 488     if (_x >= 1 << 4)                                                   \
 489       _x >>= 4, _c += 4;                                                \
 490     if (_x >= 1 << 2)                                                   \
 491       _x >>= 2, _c += 2;                                                \
 492     _c += _x >> 1;                                                      \
 493     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 494   } while (0)
 495 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 496    based, and we don't need a special case for x==0 here */
 497 #define count_trailing_zeros(count, x)                                  \
 498   do {                                                                  \
 499     UWtype __ctz_x = (x);                                               \
 500     __asm__ ("popcnt %0 = %1"                                           \
 501              : "=r" (count)                                             \
 502              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 503   } while (0)
 504 #define UMUL_TIME 14
 505 #endif
 506
 507 #if defined (__M32R__) && W_TYPE_SIZE == 32
 508 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 509   /* The cmp clears the condition bit.  */ \
 510   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 511            : "=r" ((USItype) (sh)),                                     \
 512              "=&r" ((USItype) (sl))                                     \
 513            : "0" ((USItype) (ah)),                                      \
 514              "r" ((USItype) (bh)),                                      \
 515              "1" ((USItype) (al)),                                      \
 516              "r" ((USItype) (bl))                                       \
 517            : "cbit")
 518 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 519   /* The cmp clears the condition bit.  */ \
 520   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 521            : "=r" ((USItype) (sh)),                                     \
 522              "=&r" ((USItype) (sl))                                     \
 523            : "0" ((USItype) (ah)),                                      \
 524              "r" ((USItype) (bh)),                                      \
 525              "1" ((USItype) (al)),                                      \
 526              "r" ((USItype) (bl))                                       \
 527            : "cbit")
 528 #endif /* __M32R__ */
 529
 530 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 531 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 532   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 533            : "=d" ((USItype) (sh)),                                     \
 534              "=&d" ((USItype) (sl))                                     \
 535            : "%0" ((USItype) (ah)),                                     \
 536              "d" ((USItype) (bh)),                                      \
 537              "%1" ((USItype) (al)),                                     \
 538              "g" ((USItype) (bl)))
 539 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 540   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 541            : "=d" ((USItype) (sh)),                                     \
 542              "=&d" ((USItype) (sl))                                     \
 543            : "0" ((USItype) (ah)),                                      \
 544              "d" ((USItype) (bh)),                                      \
 545              "1" ((USItype) (al)),                                      \
 546              "g" ((USItype) (bl)))
 547
 548 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 549 #if (defined (__mc68020__) && !defined (__mc68060__))
 550 #define umul_ppmm(w1, w0, u, v) \
 551   __asm__ ("mulu%.l %3,%1:%0"                                           \
 552            : "=d" ((USItype) (w0)),                                     \
 553              "=d" ((USItype) (w1))                                      \
 554            : "%0" ((USItype) (u)),                                      \
 555              "dmi" ((USItype) (v)))
 556 #define UMUL_TIME 45
 557 #define udiv_qrnnd(q, r, n1, n0, d) \
 558   __asm__ ("divu%.l %4,%1:%0"                                           \
 559            : "=d" ((USItype) (q)),                                      \
 560              "=d" ((USItype) (r))                                       \
 561            : "0" ((USItype) (n0)),                                      \
 562              "1" ((USItype) (n1)),                                      \
 563              "dmi" ((USItype) (d)))
 564 #define UDIV_TIME 90
 565 #define sdiv_qrnnd(q, r, n1, n0, d) \
 566   __asm__ ("divs%.l %4,%1:%0"                                           \
 567            : "=d" ((USItype) (q)),                                      \
 568              "=d" ((USItype) (r))                                       \
 569            : "0" ((USItype) (n0)),                                      \
 570              "1" ((USItype) (n1)),                                      \
 571              "dmi" ((USItype) (d)))
 572
 573 #elif defined (__mcoldfire__) /* not mc68020 */
 574
 575 #define umul_ppmm(xh, xl, a, b) \
 576   __asm__ ("| Inlined umul_ppmm\n"                                      \
 577            "    move%.l %2,%/d0\n"                                      \
 578            "    move%.l %3,%/d1\n"                                      \
 579            "    move%.l %/d0,%/d2\n"                                    \
 580            "    swap    %/d0\n"                                         \
 581            "    move%.l %/d1,%/d3\n"                                    \
 582            "    swap    %/d1\n"                                         \
 583            "    move%.w %/d2,%/d4\n"                                    \
 584            "    mulu    %/d3,%/d4\n"                                    \
 585            "    mulu    %/d1,%/d2\n"                                    \
 586            "    mulu    %/d0,%/d3\n"                                    \
 587            "    mulu    %/d0,%/d1\n"                                    \
 588            "    move%.l %/d4,%/d0\n"                                    \
 589            "    clr%.w  %/d0\n"                                         \
 590            "    swap    %/d0\n"                                         \
 591            "    add%.l  %/d0,%/d2\n"                                    \
 592            "    add%.l  %/d3,%/d2\n"                                    \
 593            "    jcc     1f\n"                                           \
 594            "    add%.l  %#65536,%/d1\n"                                 \
 595            "1:  swap    %/d2\n"                                         \
 596            "    moveq   %#0,%/d0\n"                                     \
 597            "    move%.w %/d2,%/d0\n"                                    \
 598            "    move%.w %/d4,%/d2\n"                                    \
 599            "    move%.l %/d2,%1\n"                                      \
 600            "    add%.l  %/d1,%/d0\n"                                    \
 601            "    move%.l %/d0,%0"                                        \
 602            : "=g" ((USItype) (xh)),                                     \
 603              "=g" ((USItype) (xl))                                      \
 604            : "g" ((USItype) (a)),                                       \
 605              "g" ((USItype) (b))                                        \
 606            : "d0", "d1", "d2", "d3", "d4")
 607 #define UMUL_TIME 100
 608 #define UDIV_TIME 400
 609 #else /* not ColdFire */
 610 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 611 #define umul_ppmm(xh, xl, a, b) \
 612   __asm__ ("| Inlined umul_ppmm\n"                                      \
 613            "    move%.l %2,%/d0\n"                                      \
 614            "    move%.l %3,%/d1\n"                                      \
 615            "    move%.l %/d0,%/d2\n"                                    \
 616            "    swap    %/d0\n"                                         \
 617            "    move%.l %/d1,%/d3\n"                                    \
 618            "    swap    %/d1\n"                                         \
 619            "    move%.w %/d2,%/d4\n"                                    \
 620            "    mulu    %/d3,%/d4\n"                                    \
 621            "    mulu    %/d1,%/d2\n"                                    \
 622            "    mulu    %/d0,%/d3\n"                                    \
 623            "    mulu    %/d0,%/d1\n"                                    \
 624            "    move%.l %/d4,%/d0\n"                                    \
 625            "    eor%.w  %/d0,%/d0\n"                                    \
 626            "    swap    %/d0\n"                                         \
 627            "    add%.l  %/d0,%/d2\n"                                    \
 628            "    add%.l  %/d3,%/d2\n"                                    \
 629            "    jcc     1f\n"                                           \
 630            "    add%.l  %#65536,%/d1\n"                                 \
 631            "1:  swap    %/d2\n"                                         \
 632            "    moveq   %#0,%/d0\n"                                     \
 633            "    move%.w %/d2,%/d0\n"                                    \
 634            "    move%.w %/d4,%/d2\n"                                    \
 635            "    move%.l %/d2,%1\n"                                      \
 636            "    add%.l  %/d1,%/d0\n"                                    \
 637            "    move%.l %/d0,%0"                                        \
 638            : "=g" ((USItype) (xh)),                                     \
 639              "=g" ((USItype) (xl))                                      \
 640            : "g" ((USItype) (a)),                                       \
 641              "g" ((USItype) (b))                                        \
 642            : "d0", "d1", "d2", "d3", "d4")
 643 #define UMUL_TIME 100
 644 #define UDIV_TIME 400
 645
 646 #endif /* not mc68020 */
 647
 648 /* The '020, '030, '040 and '060 have bitfield insns.
 649    cpu32 disguises as a 68020, but lacks them.  */
 650 #if defined (__mc68020__) && !defined (__mcpu32__)
 651 #define count_leading_zeros(count, x) \
 652   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 653            : "=d" ((USItype) (count))                                   \
 654            : "od" ((USItype) (x)), "n" (0))
 655 /* Some ColdFire architectures have a ff1 instruction supported via
 656    __builtin_clz. */
 657 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 658 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 659 #define COUNT_LEADING_ZEROS_0 32
 660 #endif
 661 #endif /* mc68000 */
 662
 663 #if defined (__m88000__) && W_TYPE_SIZE == 32
 664 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 665   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 666            : "=r" ((USItype) (sh)),                                     \
 667              "=&r" ((USItype) (sl))                                     \
 668            : "%rJ" ((USItype) (ah)),                                    \
 669              "rJ" ((USItype) (bh)),                                     \
 670              "%rJ" ((USItype) (al)),                                    \
 671              "rJ" ((USItype) (bl)))
 672 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 673   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 674            : "=r" ((USItype) (sh)),                                     \
 675              "=&r" ((USItype) (sl))                                     \
 676            : "rJ" ((USItype) (ah)),                                     \
 677              "rJ" ((USItype) (bh)),                                     \
 678              "rJ" ((USItype) (al)),                                     \
 679              "rJ" ((USItype) (bl)))
 680 #define count_leading_zeros(count, x) \
 681   do {                                                                  \
 682     USItype __cbtmp;                                                    \
 683     __asm__ ("ff1 %0,%1"                                                \
 684              : "=r" (__cbtmp)                                           \
 685              : "r" ((USItype) (x)));                                    \
 686     (count) = __cbtmp ^ 31;                                             \
 687   } while (0)
 688 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 689 #if defined (__mc88110__)
 690 #define umul_ppmm(wh, wl, u, v) \
 691   do {                                                                  \
 692     union {UDItype __ll;                                                \
 693            struct {USItype __h, __l;} __i;                              \
 694           } __xx;                                                       \
 695     __asm__ ("mulu.d    %0,%1,%2"                                       \
 696              : "=r" (__xx.__ll)                                         \
 697              : "r" ((USItype) (u)),                                     \
 698                "r" ((USItype) (v)));                                    \
 699     (wh) = __xx.__i.__h;                                                \
 700     (wl) = __xx.__i.__l;                                                \
 701   } while (0)
 702 #define udiv_qrnnd(q, r, n1, n0, d) \
 703   ({union {UDItype __ll;                                                \
 704            struct {USItype __h, __l;} __i;                              \
 705           } __xx;                                                       \
 706   USItype __q;                                                          \
 707   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 708   __asm__ ("divu.d %0,%1,%2"                                            \
 709            : "=r" (__q)                                                 \
 710            : "r" (__xx.__ll),                                           \
 711              "r" ((USItype) (d)));                                      \
 712   (r) = (n0) - __q * (d); (q) = __q; })
 713 #define UMUL_TIME 5
 714 #define UDIV_TIME 25
 715 #else
 716 #define UMUL_TIME 17
 717 #define UDIV_TIME 150
 718 #endif /* __mc88110__ */
 719 #endif /* __m88000__ */
 720
 721 #if defined (__mips__) && W_TYPE_SIZE == 32
 722 #define umul_ppmm(w1, w0, u, v)                                         \
 723   do {                                                                  \
 724     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 725     (w1) = (USItype) (__x >> 32);                                       \
 726     (w0) = (USItype) (__x);                                             \
 727   } while (0)
 728 #define UMUL_TIME 10
 729 #define UDIV_TIME 100
 730
 731 #if (__mips == 32 || __mips == 64) && ! __mips16
 732 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 733 #define COUNT_LEADING_ZEROS_0 32
 734 #endif
 735 #endif /* __mips__ */
 736
 737 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 738 #define umul_ppmm(w1, w0, u, v) \
 739   ({union {UDItype __ll;                                                \
 740            struct {USItype __l, __h;} __i;                              \
 741           } __xx;                                                       \
 742   __asm__ ("meid %2,%0"                                                 \
 743            : "=g" (__xx.__ll)                                           \
 744            : "%0" ((USItype) (u)),                                      \
 745              "g" ((USItype) (v)));                                      \
 746   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 747 #define __umulsidi3(u, v) \
 748   ({UDItype __w;                                                        \
 749     __asm__ ("meid %2,%0"                                               \
 750              : "=g" (__w)                                               \
 751              : "%0" ((USItype) (u)),                                    \
 752                "g" ((USItype) (v)));                                    \
 753     __w; })
 754 #define udiv_qrnnd(q, r, n1, n0, d) \
 755   ({union {UDItype __ll;                                                \
 756            struct {USItype __l, __h;} __i;                              \
 757           } __xx;                                                       \
 758   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 759   __asm__ ("deid %2,%0"                                                 \
 760            : "=g" (__xx.__ll)                                           \
 761            : "0" (__xx.__ll),                                           \
 762              "g" ((USItype) (d)));                                      \
 763   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 764 #define count_trailing_zeros(count,x) \
 765   do {                                                                  \
 766     __asm__ ("ffsd     %2,%0"                                           \
 767             : "=r" ((USItype) (count))                                  \
 768             : "0" ((USItype) 0),                                        \
 769               "r" ((USItype) (x)));                                     \
 770   } while (0)
 771 #endif /* __ns32000__ */
 772
 773 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 774    system vendor compilers.
 775    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 776    enough, since that hits ARM and m68k too.  */
 777 #if (defined (_ARCH_PPC)        /* AIX */                               \
 778      || defined (_ARCH_PWR)     /* AIX */                               \
 779      || defined (_ARCH_COM)     /* AIX */                               \
 780      || defined (__powerpc__)   /* gcc */                               \
 781      || defined (__POWERPC__)   /* BEOS */                              \
 782      || defined (__ppc__)       /* Darwin */                            \
 783      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 784      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 785          && CPU_FAMILY == PPC)                                                \
 786      ) && W_TYPE_SIZE == 32
 787 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 788   do {                                                                  \
 789     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 790       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 791              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 792     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 793       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 794              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 795     else                                                                \
 796       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 797              : "=r" (sh), "=&r" (sl)                                    \
 798              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 799   } while (0)
 800 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 801   do {                                                                  \
 802     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 803       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 804                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 805     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 806       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 807                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 808     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 809       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 810                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 811     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 812       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 813                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 814     else                                                                \
 815       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 816                : "=r" (sh), "=&r" (sl)                                  \
 817                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 818   } while (0)
 819 #define count_leading_zeros(count, x) \
 820   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 821 #define COUNT_LEADING_ZEROS_0 32
 822 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 823   || defined (__ppc__)                                                    \
 824   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 825   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 826          && CPU_FAMILY == PPC)
 827 #define umul_ppmm(ph, pl, m0, m1) \
 828   do {                                                                  \
 829     USItype __m0 = (m0), __m1 = (m1);                                   \
 830     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 831     (pl) = __m0 * __m1;                                                 \
 832   } while (0)
 833 #define UMUL_TIME 15
 834 #define smul_ppmm(ph, pl, m0, m1) \
 835   do {                                                                  \
 836     SItype __m0 = (m0), __m1 = (m1);                                    \
 837     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 838     (pl) = __m0 * __m1;                                                 \
 839   } while (0)
 840 #define SMUL_TIME 14
 841 #define UDIV_TIME 120
 842 #elif defined (_ARCH_PWR)
 843 #define UMUL_TIME 8
 844 #define smul_ppmm(xh, xl, m0, m1) \
 845   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 846 #define SMUL_TIME 4
 847 #define sdiv_qrnnd(q, r, nh, nl, d) \
 848   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 849 #define UDIV_TIME 100
 850 #endif
 851 #endif /* 32-bit POWER architecture variants.  */
 852
 853 /* We should test _IBMR2 here when we add assembly support for the system
 854    vendor compilers.  */
 855 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 856 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 857   do {                                                                  \
 858     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 859       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 860              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 861     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 862       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 863              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 864     else                                                                \
 865       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 866              : "=r" (sh), "=&r" (sl)                                    \
 867              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 868   } while (0)
 869 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 870   do {                                                                  \
 871     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 872       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 873                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 874     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 875       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 876                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 877     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 878       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 879                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 880     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 881       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 882                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 883     else                                                                \
 884       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 885                : "=r" (sh), "=&r" (sl)                                  \
 886                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 887   } while (0)
 888 #define count_leading_zeros(count, x) \
 889   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 890 #define COUNT_LEADING_ZEROS_0 64
 891 #define umul_ppmm(ph, pl, m0, m1) \
 892   do {                                                                  \
 893     UDItype __m0 = (m0), __m1 = (m1);                                   \
 894     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 895     (pl) = __m0 * __m1;                                                 \
 896   } while (0)
 897 #define UMUL_TIME 15
 898 #define smul_ppmm(ph, pl, m0, m1) \
 899   do {                                                                  \
 900     DItype __m0 = (m0), __m1 = (m1);                                    \
 901     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 902     (pl) = __m0 * __m1;                                                 \
 903   } while (0)
 904 #define SMUL_TIME 14  /* ??? */
 905 #define UDIV_TIME 120 /* ??? */
 906 #endif /* 64-bit PowerPC.  */
 907
 908 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 909 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 910   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 911            : "=r" ((USItype) (sh)),                                     \
 912              "=&r" ((USItype) (sl))                                     \
 913            : "%0" ((USItype) (ah)),                                     \
 914              "r" ((USItype) (bh)),                                      \
 915              "%1" ((USItype) (al)),                                     \
 916              "r" ((USItype) (bl)))
 917 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 918   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 919            : "=r" ((USItype) (sh)),                                     \
 920              "=&r" ((USItype) (sl))                                     \
 921            : "0" ((USItype) (ah)),                                      \
 922              "r" ((USItype) (bh)),                                      \
 923              "1" ((USItype) (al)),                                      \
 924              "r" ((USItype) (bl)))
 925 #define umul_ppmm(ph, pl, m0, m1) \
 926   do {                                                                  \
 927     USItype __m0 = (m0), __m1 = (m1);                                   \
 928     __asm__ (                                                           \
 929        "s       r2,r2\n"                                                \
 930 "       mts     r10,%2\n"                                               \
 931 "       m       r2,%3\n"                                                \
 932 "       m       r2,%3\n"                                                \
 933 "       m       r2,%3\n"                                                \
 934 "       m       r2,%3\n"                                                \
 935 "       m       r2,%3\n"                                                \
 936 "       m       r2,%3\n"                                                \
 937 "       m       r2,%3\n"                                                \
 938 "       m       r2,%3\n"                                                \
 939 "       m       r2,%3\n"                                                \
 940 "       m       r2,%3\n"                                                \
 941 "       m       r2,%3\n"                                                \
 942 "       m       r2,%3\n"                                                \
 943 "       m       r2,%3\n"                                                \
 944 "       m       r2,%3\n"                                                \
 945 "       m       r2,%3\n"                                                \
 946 "       m       r2,%3\n"                                                \
 947 "       cas     %0,r2,r0\n"                                             \
 948 "       mfs     r10,%1"                                                 \
 949              : "=r" ((USItype) (ph)),                                   \
 950                "=r" ((USItype) (pl))                                    \
 951              : "%r" (__m0),                                             \
 952                 "r" (__m1)                                              \
 953              : "r2");                                                   \
 954     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
 955              + (((SItype) __m1 >> 31) & __m0));                         \
 956   } while (0)
 957 #define UMUL_TIME 20
 958 #define UDIV_TIME 200
 959 #define count_leading_zeros(count, x) \
 960   do {                                                                  \
 961     if ((x) >= 0x10000)                                                 \
 962       __asm__ ("clz     %0,%1"                                          \
 963                : "=r" ((USItype) (count))                               \
 964                : "r" ((USItype) (x) >> 16));                            \
 965     else                                                                \
 966       {                                                                 \
 967         __asm__ ("clz   %0,%1"                                          \
 968                  : "=r" ((USItype) (count))                             \
 969                  : "r" ((USItype) (x)));                                        \
 970         (count) += 16;                                                  \
 971       }                                                                 \
 972   } while (0)
 973 #endif
 974
 975 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
 976 #ifndef __sh1__
 977 #define umul_ppmm(w1, w0, u, v) \
 978   __asm__ (                                                             \
 979        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
 980            : "=r<" ((USItype)(w1)),                                     \
 981              "=r<" ((USItype)(w0))                                      \
 982            : "r" ((USItype)(u)),                                        \
 983              "r" ((USItype)(v))                                         \
 984            : "macl", "mach")
 985 #define UMUL_TIME 5
 986 #endif
 987
 988 /* This is the same algorithm as __udiv_qrnnd_c.  */
 989 #define UDIV_NEEDS_NORMALIZATION 1
 990
 991 #define udiv_qrnnd(q, r, n1, n0, d) \
 992   do {                                                                  \
 993     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
 994                         __attribute__ ((visibility ("hidden")));        \
 995     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
 996     __asm__ (                                                           \
 997         "mov%M4 %4,r5\n"                                                \
 998 "       swap.w %3,r4\n"                                                 \
 999 "       swap.w r5,r6\n"                                                 \
1000 "       jsr @%5\n"                                                      \
1001 "       shll16 r6\n"                                                    \
1002 "       swap.w r4,r4\n"                                                 \
1003 "       jsr @%5\n"                                                      \
1004 "       swap.w r1,%0\n"                                                 \
1005 "       or r1,%0"                                                       \
1006         : "=r" (q), "=&z" (r)                                           \
1007         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1008         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1009   } while (0)
1010
1011 #define UDIV_TIME 80
1012
1013 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1014   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1015            : "=r" (sh), "=r" (sl)                                       \
1016            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1017
1018 #endif /* __sh__ */
1019
1020 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1021 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1022 #define count_leading_zeros(count, x) \
1023   do                                                                    \
1024     {                                                                   \
1025       UDItype x_ = (USItype)(x);                                        \
1026       SItype c_;                                                        \
1027                                                                         \
1028       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1029       (count) = c_ - 31;                                                \
1030     }                                                                   \
1031   while (0)
1032 #define COUNT_LEADING_ZEROS_0 32
1033 #endif
1034
1035 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1036     && W_TYPE_SIZE == 32
1037 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1038   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1039            : "=r" ((USItype) (sh)),                                     \
1040              "=&r" ((USItype) (sl))                                     \
1041            : "%rJ" ((USItype) (ah)),                                    \
1042              "rI" ((USItype) (bh)),                                     \
1043              "%rJ" ((USItype) (al)),                                    \
1044              "rI" ((USItype) (bl))                                      \
1045            __CLOBBER_CC)
1046 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1047   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1048            : "=r" ((USItype) (sh)),                                     \
1049              "=&r" ((USItype) (sl))                                     \
1050            : "rJ" ((USItype) (ah)),                                     \
1051              "rI" ((USItype) (bh)),                                     \
1052              "rJ" ((USItype) (al)),                                     \
1053              "rI" ((USItype) (bl))                                      \
1054            __CLOBBER_CC)
1055 #if defined (__sparc_v8__)
1056 #define umul_ppmm(w1, w0, u, v) \
1057   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1058            : "=r" ((USItype) (w1)),                                     \
1059              "=r" ((USItype) (w0))                                      \
1060            : "r" ((USItype) (u)),                                       \
1061              "r" ((USItype) (v)))
1062 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1063   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1064            : "=&r" ((USItype) (__q)),                                   \
1065              "=&r" ((USItype) (__r))                                    \
1066            : "r" ((USItype) (__n1)),                                    \
1067              "r" ((USItype) (__n0)),                                    \
1068              "r" ((USItype) (__d)))
1069 #else
1070 #if defined (__sparclite__)
1071 /* This has hardware multiply but not divide.  It also has two additional
1072    instructions scan (ffs from high bit) and divscc.  */
1073 #define umul_ppmm(w1, w0, u, v) \
1074   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1075            : "=r" ((USItype) (w1)),                                     \
1076              "=r" ((USItype) (w0))                                      \
1077            : "r" ((USItype) (u)),                                       \
1078              "r" ((USItype) (v)))
1079 #define udiv_qrnnd(q, r, n1, n0, d) \
1080   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1081 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1082 "       tst     %%g0\n"                                                 \
1083 "       divscc  %3,%4,%%g1\n"                                           \
1084 "       divscc  %%g1,%4,%%g1\n"                                         \
1085 "       divscc  %%g1,%4,%%g1\n"                                         \
1086 "       divscc  %%g1,%4,%%g1\n"                                         \
1087 "       divscc  %%g1,%4,%%g1\n"                                         \
1088 "       divscc  %%g1,%4,%%g1\n"                                         \
1089 "       divscc  %%g1,%4,%%g1\n"                                         \
1090 "       divscc  %%g1,%4,%%g1\n"                                         \
1091 "       divscc  %%g1,%4,%%g1\n"                                         \
1092 "       divscc  %%g1,%4,%%g1\n"                                         \
1093 "       divscc  %%g1,%4,%%g1\n"                                         \
1094 "       divscc  %%g1,%4,%%g1\n"                                         \
1095 "       divscc  %%g1,%4,%%g1\n"                                         \
1096 "       divscc  %%g1,%4,%%g1\n"                                         \
1097 "       divscc  %%g1,%4,%%g1\n"                                         \
1098 "       divscc  %%g1,%4,%%g1\n"                                         \
1099 "       divscc  %%g1,%4,%%g1\n"                                         \
1100 "       divscc  %%g1,%4,%%g1\n"                                         \
1101 "       divscc  %%g1,%4,%%g1\n"                                         \
1102 "       divscc  %%g1,%4,%%g1\n"                                         \
1103 "       divscc  %%g1,%4,%%g1\n"                                         \
1104 "       divscc  %%g1,%4,%%g1\n"                                         \
1105 "       divscc  %%g1,%4,%%g1\n"                                         \
1106 "       divscc  %%g1,%4,%%g1\n"                                         \
1107 "       divscc  %%g1,%4,%%g1\n"                                         \
1108 "       divscc  %%g1,%4,%%g1\n"                                         \
1109 "       divscc  %%g1,%4,%%g1\n"                                         \
1110 "       divscc  %%g1,%4,%%g1\n"                                         \
1111 "       divscc  %%g1,%4,%%g1\n"                                         \
1112 "       divscc  %%g1,%4,%%g1\n"                                         \
1113 "       divscc  %%g1,%4,%%g1\n"                                         \
1114 "       divscc  %%g1,%4,%0\n"                                           \
1115 "       rd      %%y,%1\n"                                               \
1116 "       bl,a 1f\n"                                                      \
1117 "       add     %1,%4,%1\n"                                             \
1118 "1:     ! End of inline udiv_qrnnd"                                     \
1119            : "=r" ((USItype) (q)),                                      \
1120              "=r" ((USItype) (r))                                       \
1121            : "r" ((USItype) (n1)),                                      \
1122              "r" ((USItype) (n0)),                                      \
1123              "rI" ((USItype) (d))                                       \
1124            : "g1" __AND_CLOBBER_CC)
1125 #define UDIV_TIME 37
1126 #define count_leading_zeros(count, x) \
1127   do {                                                                  \
1128   __asm__ ("scan %1,1,%0"                                               \
1129            : "=r" ((USItype) (count))                                   \
1130            : "r" ((USItype) (x)));                                      \
1131   } while (0)
1132 /* Early sparclites return 63 for an argument of 0, but they warn that future
1133    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1134    undefined.  */
1135 #else
1136 /* SPARC without integer multiplication and divide instructions.
1137    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1138 #define umul_ppmm(w1, w0, u, v) \
1139   __asm__ ("! Inlined umul_ppmm\n"                                      \
1140 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1141 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1142 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1143 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1144 "       mulscc  %%g1,%3,%%g1\n"                                         \
1145 "       mulscc  %%g1,%3,%%g1\n"                                         \
1146 "       mulscc  %%g1,%3,%%g1\n"                                         \
1147 "       mulscc  %%g1,%3,%%g1\n"                                         \
1148 "       mulscc  %%g1,%3,%%g1\n"                                         \
1149 "       mulscc  %%g1,%3,%%g1\n"                                         \
1150 "       mulscc  %%g1,%3,%%g1\n"                                         \
1151 "       mulscc  %%g1,%3,%%g1\n"                                         \
1152 "       mulscc  %%g1,%3,%%g1\n"                                         \
1153 "       mulscc  %%g1,%3,%%g1\n"                                         \
1154 "       mulscc  %%g1,%3,%%g1\n"                                         \
1155 "       mulscc  %%g1,%3,%%g1\n"                                         \
1156 "       mulscc  %%g1,%3,%%g1\n"                                         \
1157 "       mulscc  %%g1,%3,%%g1\n"                                         \
1158 "       mulscc  %%g1,%3,%%g1\n"                                         \
1159 "       mulscc  %%g1,%3,%%g1\n"                                         \
1160 "       mulscc  %%g1,%3,%%g1\n"                                         \
1161 "       mulscc  %%g1,%3,%%g1\n"                                         \
1162 "       mulscc  %%g1,%3,%%g1\n"                                         \
1163 "       mulscc  %%g1,%3,%%g1\n"                                         \
1164 "       mulscc  %%g1,%3,%%g1\n"                                         \
1165 "       mulscc  %%g1,%3,%%g1\n"                                         \
1166 "       mulscc  %%g1,%3,%%g1\n"                                         \
1167 "       mulscc  %%g1,%3,%%g1\n"                                         \
1168 "       mulscc  %%g1,%3,%%g1\n"                                         \
1169 "       mulscc  %%g1,%3,%%g1\n"                                         \
1170 "       mulscc  %%g1,%3,%%g1\n"                                         \
1171 "       mulscc  %%g1,%3,%%g1\n"                                         \
1172 "       mulscc  %%g1,%3,%%g1\n"                                         \
1173 "       mulscc  %%g1,%3,%%g1\n"                                         \
1174 "       mulscc  %%g1,%3,%%g1\n"                                         \
1175 "       mulscc  %%g1,%3,%%g1\n"                                         \
1176 "       mulscc  %%g1,0,%%g1\n"                                          \
1177 "       add     %%g1,%%o5,%0\n"                                         \
1178 "       rd      %%y,%1"                                                 \
1179            : "=r" ((USItype) (w1)),                                     \
1180              "=r" ((USItype) (w0))                                      \
1181            : "%rI" ((USItype) (u)),                                     \
1182              "r" ((USItype) (v))                                                \
1183            : "g1", "o5" __AND_CLOBBER_CC)
1184 #define UMUL_TIME 39            /* 39 instructions */
1185 /* It's quite necessary to add this much assembler for the sparc.
1186    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1187 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1188   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1189 "       mov     32,%%g1\n"                                              \
1190 "       subcc   %1,%2,%%g0\n"                                           \
1191 "1:     bcs     5f\n"                                                   \
1192 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1193 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1194 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1195 "       subcc   %%g1,1,%%g1\n"                                          \
1196 "2:     bne     1b\n"                                                   \
1197 "        subcc  %1,%2,%%g0\n"                                           \
1198 "       bcs     3f\n"                                                   \
1199 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1200 "       b       3f\n"                                                   \
1201 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1202 "4:     sub     %1,%2,%1\n"                                             \
1203 "5:     addxcc  %1,%1,%1\n"                                             \
1204 "       bcc     2b\n"                                                   \
1205 "        subcc  %%g1,1,%%g1\n"                                          \
1206 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1207 "       bne     4b\n"                                                   \
1208 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1209 "       sub     %1,%2,%1\n"                                             \
1210 "3:     xnor    %0,0,%0\n"                                              \
1211 "       ! End of inline udiv_qrnnd"                                     \
1212            : "=&r" ((USItype) (__q)),                                   \
1213              "=&r" ((USItype) (__r))                                    \
1214            : "r" ((USItype) (__d)),                                     \
1215              "1" ((USItype) (__n1)),                                    \
1216              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1217 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1218 #endif /* __sparclite__ */
1219 #endif /* __sparc_v8__ */
1220 #endif /* sparc32 */
1221
1222 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1223     && W_TYPE_SIZE == 64
1224 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1225   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1226            "add %r2,%3,%0\n\t"                                          \
1227            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1228            "add %0, 1, %0\n"                                            \
1229            "1:"                                                         \
1230            : "=r" ((UDItype)(sh)),                                      \
1231              "=&r" ((UDItype)(sl))                                      \
1232            : "%rJ" ((UDItype)(ah)),                                     \
1233              "rI" ((UDItype)(bh)),                                      \
1234              "%rJ" ((UDItype)(al)),                                     \
1235              "rI" ((UDItype)(bl))                                       \
1236            __CLOBBER_CC)
1237
1238 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1239   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1240            "sub %r2,%3,%0\n\t"                                          \
1241            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1242            "sub %0, 1, %0\n\t"                                          \
1243            "1:"                                                         \
1244            : "=r" ((UDItype)(sh)),                                      \
1245              "=&r" ((UDItype)(sl))                                      \
1246            : "rJ" ((UDItype)(ah)),                                      \
1247              "rI" ((UDItype)(bh)),                                      \
1248              "rJ" ((UDItype)(al)),                                      \
1249              "rI" ((UDItype)(bl))                                       \
1250            __CLOBBER_CC)
1251
1252 #define umul_ppmm(wh, wl, u, v)                                         \
1253   do {                                                                  \
1254           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1255           __asm__ __volatile__ (                                        \
1256                    "srl %7,0,%3\n\t"                                    \
1257                    "mulx %3,%6,%1\n\t"                                  \
1258                    "srlx %6,32,%2\n\t"                                  \
1259                    "mulx %2,%3,%4\n\t"                                  \
1260                    "sllx %4,32,%5\n\t"                                  \
1261                    "srl %6,0,%3\n\t"                                    \
1262                    "sub %1,%5,%5\n\t"                                   \
1263                    "srlx %5,32,%5\n\t"                                  \
1264                    "addcc %4,%5,%4\n\t"                                 \
1265                    "srlx %7,32,%5\n\t"                                  \
1266                    "mulx %3,%5,%3\n\t"                                  \
1267                    "mulx %2,%5,%5\n\t"                                  \
1268                    "sethi %%hi(0x80000000),%2\n\t"                      \
1269                    "addcc %4,%3,%4\n\t"                                 \
1270                    "srlx %4,32,%4\n\t"                                  \
1271                    "add %2,%2,%2\n\t"                                   \
1272                    "movcc %%xcc,%%g0,%2\n\t"                            \
1273                    "addcc %5,%4,%5\n\t"                                 \
1274                    "sllx %3,32,%3\n\t"                                  \
1275                    "add %1,%3,%1\n\t"                                   \
1276                    "add %5,%2,%0"                                       \
1277            : "=r" ((UDItype)(wh)),                                      \
1278              "=&r" ((UDItype)(wl)),                                     \
1279              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1280            : "r" ((UDItype)(u)),                                        \
1281              "r" ((UDItype)(v))                                         \
1282            __CLOBBER_CC);                                               \
1283   } while (0)
1284 #define UMUL_TIME 96
1285 #define UDIV_TIME 230
1286 #endif /* sparc64 */
1287
1288 #if defined (__vax__) && W_TYPE_SIZE == 32
1289 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1290   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1291            : "=g" ((USItype) (sh)),                                     \
1292              "=&g" ((USItype) (sl))                                     \
1293            : "%0" ((USItype) (ah)),                                     \
1294              "g" ((USItype) (bh)),                                      \
1295              "%1" ((USItype) (al)),                                     \
1296              "g" ((USItype) (bl)))
1297 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1298   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1299            : "=g" ((USItype) (sh)),                                     \
1300              "=&g" ((USItype) (sl))                                     \
1301            : "0" ((USItype) (ah)),                                      \
1302              "g" ((USItype) (bh)),                                      \
1303              "1" ((USItype) (al)),                                      \
1304              "g" ((USItype) (bl)))
1305 #define umul_ppmm(xh, xl, m0, m1) \
1306   do {                                                                  \
1307     union {                                                             \
1308         UDItype __ll;                                                   \
1309         struct {USItype __l, __h;} __i;                                 \
1310       } __xx;                                                           \
1311     USItype __m0 = (m0), __m1 = (m1);                                   \
1312     __asm__ ("emul %1,%2,$0,%0"                                         \
1313              : "=r" (__xx.__ll)                                         \
1314              : "g" (__m0),                                              \
1315                "g" (__m1));                                             \
1316     (xh) = __xx.__i.__h;                                                \
1317     (xl) = __xx.__i.__l;                                                \
1318     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1319              + (((SItype) __m1 >> 31) & __m0));                         \
1320   } while (0)
1321 #define sdiv_qrnnd(q, r, n1, n0, d) \
1322   do {                                                                  \
1323     union {DItype __ll;                                                 \
1324            struct {SItype __l, __h;} __i;                               \
1325           } __xx;                                                       \
1326     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1327     __asm__ ("ediv %3,%2,%0,%1"                                         \
1328              : "=g" (q), "=g" (r)                                       \
1329              : "g" (__xx.__ll), "g" (d));                               \
1330   } while (0)
1331 #endif /* __vax__ */
1332
1333 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1334 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1335    to expand builtin functions depending on what configuration features
1336    are available.  This avoids library calls when the operation can be
1337    performed in-line.  */
1338 #define umul_ppmm(w1, w0, u, v)                                         \
1339   do {                                                                  \
1340     DWunion __w;                                                        \
1341     __w.ll = __builtin_umulsidi3 (u, v);                                \
1342     w1 = __w.s.high;                                                    \
1343     w0 = __w.s.low;                                                     \
1344   } while (0)
1345 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1346 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1347 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1348 #endif /* __xtensa__ */
1349
1350 #if defined xstormy16
1351 extern UHItype __stormy16_count_leading_zeros (UHItype);
1352 #define count_leading_zeros(count, x)                                   \
1353   do                                                                    \
1354     {                                                                   \
1355       UHItype size;                                                     \
1356                                                                         \
1357       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1358       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1359         {                                                               \
1360           UHItype c;                                                    \
1361                                                                         \
1362           c = __clzhi2 ((x) >> (size - 16));                            \
1363           (count) += c;                                                 \
1364           if (c != 16)                                                  \
1365             break;                                                      \
1366         }                                                               \
1367     }                                                                   \
1368   while (0)
1369 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1370 #endif
1371
1372 #if defined (__z8000__) && W_TYPE_SIZE == 16
1373 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1374   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1375            : "=r" ((unsigned int)(sh)),                                 \
1376              "=&r" ((unsigned int)(sl))                                 \
1377            : "%0" ((unsigned int)(ah)),                                 \
1378              "r" ((unsigned int)(bh)),                                  \
1379              "%1" ((unsigned int)(al)),                                 \
1380              "rQR" ((unsigned int)(bl)))
1381 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1382   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1383            : "=r" ((unsigned int)(sh)),                                 \
1384              "=&r" ((unsigned int)(sl))                                 \
1385            : "0" ((unsigned int)(ah)),                                  \
1386              "r" ((unsigned int)(bh)),                                  \
1387              "1" ((unsigned int)(al)),                                  \
1388              "rQR" ((unsigned int)(bl)))
1389 #define umul_ppmm(xh, xl, m0, m1) \
1390   do {                                                                  \
1391     union {long int __ll;                                               \
1392            struct {unsigned int __h, __l;} __i;                         \
1393           } __xx;                                                       \
1394     unsigned int __m0 = (m0), __m1 = (m1);                              \
1395     __asm__ ("mult      %S0,%H3"                                        \
1396              : "=r" (__xx.__i.__h),                                     \
1397                "=r" (__xx.__i.__l)                                      \
1398              : "%1" (__m0),                                             \
1399                "rQR" (__m1));                                           \
1400     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1401     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1402              + (((signed int) __m1 >> 15) & __m0));                     \
1403   } while (0)
1404 #endif /* __z8000__ */
1405
1406 #endif /* __GNUC__ */
1407
1408 /* If this machine has no inline assembler, use C macros.  */
1409
1410 #if !defined (add_ssaaaa)
1411 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1412   do {                                                                  \
1413     UWtype __x;                                                         \
1414     __x = (al) + (bl);                                                  \
1415     (sh) = (ah) + (bh) + (__x < (al));                                  \
1416     (sl) = __x;                                                         \
1417   } while (0)
1418 #endif
1419
1420 #if !defined (sub_ddmmss)
1421 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1422   do {                                                                  \
1423     UWtype __x;                                                         \
1424     __x = (al) - (bl);                                                  \
1425     (sh) = (ah) - (bh) - (__x > (al));                                  \
1426     (sl) = __x;                                                         \
1427   } while (0)
1428 #endif
1429
1430 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1431    smul_ppmm.  */
1432 #if !defined (umul_ppmm) && defined (smul_ppmm)
1433 #define umul_ppmm(w1, w0, u, v)                                         \
1434   do {                                                                  \
1435     UWtype __w1;                                                        \
1436     UWtype __xm0 = (u), __xm1 = (v);                                    \
1437     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1438     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1439                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1440   } while (0)
1441 #endif
1442
1443 /* If we still don't have umul_ppmm, define it using plain C.  */
1444 #if !defined (umul_ppmm)
1445 #define umul_ppmm(w1, w0, u, v)                                         \
1446   do {                                                                  \
1447     UWtype __x0, __x1, __x2, __x3;                                      \
1448     UHWtype __ul, __vl, __uh, __vh;                                     \
1449                                                                         \
1450     __ul = __ll_lowpart (u);                                            \
1451     __uh = __ll_highpart (u);                                           \
1452     __vl = __ll_lowpart (v);                                            \
1453     __vh = __ll_highpart (v);                                           \
1454                                                                         \
1455     __x0 = (UWtype) __ul * __vl;                                        \
1456     __x1 = (UWtype) __ul * __vh;                                        \
1457     __x2 = (UWtype) __uh * __vl;                                        \
1458     __x3 = (UWtype) __uh * __vh;                                        \
1459                                                                         \
1460     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1461     __x1 += __x2;               /* but this indeed can */               \
1462     if (__x1 < __x2)            /* did we get it? */                    \
1463       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1464                                                                         \
1465     (w1) = __x3 + __ll_highpart (__x1);                                 \
1466     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1467   } while (0)
1468 #endif
1469
1470 #if !defined (__umulsidi3)
1471 #define __umulsidi3(u, v) \
1472   ({DWunion __w;                                                        \
1473     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1474     __w.ll; })
1475 #endif
1476
1477 /* Define this unconditionally, so it can be used for debugging.  */
1478 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1479   do {                                                                  \
1480     UWtype __d1, __d0, __q1, __q0;                                      \
1481     UWtype __r1, __r0, __m;                                             \
1482     __d1 = __ll_highpart (d);                                           \
1483     __d0 = __ll_lowpart (d);                                            \
1484                                                                         \
1485     __r1 = (n1) % __d1;                                                 \
1486     __q1 = (n1) / __d1;                                                 \
1487     __m = (UWtype) __q1 * __d0;                                         \
1488     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1489     if (__r1 < __m)                                                     \
1490       {                                                                 \
1491         __q1--, __r1 += (d);                                            \
1492         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1493           if (__r1 < __m)                                               \
1494             __q1--, __r1 += (d);                                        \
1495       }                                                                 \
1496     __r1 -= __m;                                                        \
1497                                                                         \
1498     __r0 = __r1 % __d1;                                                 \
1499     __q0 = __r1 / __d1;                                                 \
1500     __m = (UWtype) __q0 * __d0;                                         \
1501     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1502     if (__r0 < __m)                                                     \
1503       {                                                                 \
1504         __q0--, __r0 += (d);                                            \
1505         if (__r0 >= (d))                                                \
1506           if (__r0 < __m)                                               \
1507             __q0--, __r0 += (d);                                        \
1508       }                                                                 \
1509     __r0 -= __m;                                                        \
1510                                                                         \
1511     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1512     (r) = __r0;                                                         \
1513   } while (0)
1514
1515 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1516    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1517 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1518 #define udiv_qrnnd(q, r, nh, nl, d) \
1519   do {                                                                  \
1520     USItype __r;                                                        \
1521     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1522     (r) = __r;                                                          \
1523   } while (0)
1524 #endif
1525
1526 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1527 #if !defined (udiv_qrnnd)
1528 #define UDIV_NEEDS_NORMALIZATION 1
1529 #define udiv_qrnnd __udiv_qrnnd_c
1530 #endif
1531
1532 #if !defined (count_leading_zeros)
1533 #define count_leading_zeros(count, x) \
1534   do {                                                                  \
1535     UWtype __xr = (x);                                                  \
1536     UWtype __a;                                                         \
1537                                                                         \
1538     if (W_TYPE_SIZE <= 32)                                              \
1539       {                                                                 \
1540         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1541           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1542           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1543       }                                                                 \
1544     else                                                                \
1545       {                                                                 \
1546         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1547           if (((__xr >> __a) & 0xff) != 0)                              \
1548             break;                                                      \
1549       }                                                                 \
1550                                                                         \
1551     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1552   } while (0)
1553 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1554 #endif
1555
1556 #if !defined (count_trailing_zeros)
1557 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1558    defined in asm, but if it is not, the C version above is good enough.  */
1559 #define count_trailing_zeros(count, x) \
1560   do {                                                                  \
1561     UWtype __ctz_x = (x);                                               \
1562     UWtype __ctz_c;                                                     \
1563     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1564     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1565   } while (0)
1566 #endif
1567
1568 #ifndef UDIV_NEEDS_NORMALIZATION
1569 #define UDIV_NEEDS_NORMALIZATION 0
1570 #endif