stdlib/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, see
  29    <http://www.gnu.org/licenses/>.  */
  30
  31 /* You have to define the following before including this file:
  32
  33    UWtype -- An unsigned type, default type for operations (typically a "word")
  34    UHWtype -- An unsigned type, at least half the size of UWtype.
  35    UDWtype -- An unsigned type, at least twice as large a UWtype
  36    W_TYPE_SIZE -- size in bits of UWtype
  37
  38    UQItype -- Unsigned 8 bit type.
  39    SItype, USItype -- Signed and unsigned 32 bit types.
  40    DItype, UDItype -- Signed and unsigned 64 bit types.
  41
  42    On a 32 bit machine UWtype should typically be USItype;
  43    on a 64 bit machine, UWtype should typically be UDItype.  */
  44
  45 #define __BITS4 (W_TYPE_SIZE / 4)
  46 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  47 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  48 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  49
  50 #ifndef W_TYPE_SIZE
  51 #define W_TYPE_SIZE     32
  52 #define UWtype          USItype
  53 #define UHWtype         USItype
  54 #define UDWtype         UDItype
  55 #endif
  56
  57 /* Used in glibc only.  */
  58 #ifndef attribute_hidden
  59 #define attribute_hidden
  60 #endif
  61
  62 extern const UQItype __clz_tab[256] attribute_hidden;
  63
  64 /* Define auxiliary asm macros.
  65
  66    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  67    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  68    word product in HIGH_PROD and LOW_PROD.
  69
  70    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  71    UDWtype product.  This is just a variant of umul_ppmm.
  72
  73    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  74    denominator) divides a UDWtype, composed by the UWtype integers
  75    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  76    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  77    than DENOMINATOR for correct operation.  If, in addition, the most
  78    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  79    UDIV_NEEDS_NORMALIZATION is defined to 1.
  80
  81    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  82    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  83    is rounded towards 0.
  84
  85    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  86    msb to the first nonzero bit in the UWtype X.  This is the number of
  87    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  88    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  89
  90    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  91    from the least significant end.
  92
  93    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  94    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  95    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  96    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  97    (i.e. carry out) is not stored anywhere, and is lost.
  98
  99    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 100    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 101    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 102    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 103    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 104    and is lost.
 105
 106    If any of these macros are left undefined for a particular CPU,
 107    C macros are used.  */
 108
 109 /* The CPUs come in alphabetical order below.
 110
 111    Please add support for more CPUs here, or improve the current support
 112    for the CPUs below!
 113    (E.g. WE32100, IBM360.)  */
 114
 115 #if defined (__GNUC__) && !defined (NO_ASM)
 116
 117 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 118    understood by gcc1.  Use cpp to avoid major code duplication.  */
 119 #if __GNUC__ < 2
 120 #define __CLOBBER_CC
 121 #define __AND_CLOBBER_CC
 122 #else /* __GNUC__ >= 2 */
 123 #define __CLOBBER_CC : "cc"
 124 #define __AND_CLOBBER_CC , "cc"
 125 #endif /* __GNUC__ < 2 */
 126
 127 #if defined (__alpha) && W_TYPE_SIZE == 64
 128 #define umul_ppmm(ph, pl, m0, m1) \
 129   do {                                                                  \
 130     UDItype __m0 = (m0), __m1 = (m1);                                   \
 131     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 132     (pl) = __m0 * __m1;                                                 \
 133   } while (0)
 134 #define UMUL_TIME 46
 135 #ifndef LONGLONG_STANDALONE
 136 #define udiv_qrnnd(q, r, n1, n0, d) \
 137   do { UDItype __r;                                                     \
 138     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 139     (r) = __r;                                                          \
 140   } while (0)
 141 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 142 #define UDIV_TIME 220
 143 #endif /* LONGLONG_STANDALONE */
 144 #ifdef __alpha_cix__
 145 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 146 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 147 #define COUNT_LEADING_ZEROS_0 64
 148 #else
 149 #define count_leading_zeros(COUNT,X) \
 150   do {                                                                  \
 151     UDItype __xr = (X), __t, __a;                                       \
 152     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 153     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 154     __t = __builtin_alpha_extbl (__xr, __a);                            \
 155     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 156   } while (0)
 157 #define count_trailing_zeros(COUNT,X) \
 158   do {                                                                  \
 159     UDItype __xr = (X), __t, __a;                                       \
 160     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 161     __t = ~__t & -~__t;                                                 \
 162     __a = ((__t & 0xCC) != 0) * 2;                                      \
 163     __a += ((__t & 0xF0) != 0) * 4;                                     \
 164     __a += ((__t & 0xAA) != 0);                                         \
 165     __t = __builtin_alpha_extbl (__xr, __a);                            \
 166     __a <<= 3;                                                          \
 167     __t &= -__t;                                                        \
 168     __a += ((__t & 0xCC) != 0) * 2;                                     \
 169     __a += ((__t & 0xF0) != 0) * 4;                                     \
 170     __a += ((__t & 0xAA) != 0);                                         \
 171     (COUNT) = __a;                                                      \
 172   } while (0)
 173 #endif /* __alpha_cix__ */
 174 #endif /* __alpha */
 175
 176 #if defined (__arc__) && W_TYPE_SIZE == 32
 177 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 178   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 179            : "=r" ((USItype) (sh)),                                     \
 180              "=&r" ((USItype) (sl))                                     \
 181            : "%r" ((USItype) (ah)),                                     \
 182              "rIJ" ((USItype) (bh)),                                    \
 183              "%r" ((USItype) (al)),                                     \
 184              "rIJ" ((USItype) (bl)))
 185 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 186   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 187            : "=r" ((USItype) (sh)),                                     \
 188              "=&r" ((USItype) (sl))                                     \
 189            : "r" ((USItype) (ah)),                                      \
 190              "rIJ" ((USItype) (bh)),                                    \
 191              "r" ((USItype) (al)),                                      \
 192              "rIJ" ((USItype) (bl)))
 193 /* Call libgcc routine.  */
 194 #define umul_ppmm(w1, w0, u, v) \
 195 do {                                                                    \
 196   DWunion __w;                                                          \
 197   __w.ll = __umulsidi3 (u, v);                                          \
 198   w1 = __w.s.high;                                                      \
 199   w0 = __w.s.low;                                                       \
 200 } while (0)
 201 #define __umulsidi3 __umulsidi3
 202 UDItype __umulsidi3 (USItype, USItype);
 203 #endif
 204
 205 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 206 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 207   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 208            : "=r" ((USItype) (sh)),                                     \
 209              "=&r" ((USItype) (sl))                                     \
 210            : "%r" ((USItype) (ah)),                                     \
 211              "rI" ((USItype) (bh)),                                     \
 212              "%r" ((USItype) (al)),                                     \
 213              "rI" ((USItype) (bl)) __CLOBBER_CC)
 214 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 215   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 216            : "=r" ((USItype) (sh)),                                     \
 217              "=&r" ((USItype) (sl))                                     \
 218            : "r" ((USItype) (ah)),                                      \
 219              "rI" ((USItype) (bh)),                                     \
 220              "r" ((USItype) (al)),                                      \
 221              "rI" ((USItype) (bl)) __CLOBBER_CC)
 222 #define umul_ppmm(xh, xl, a, b) \
 223 {register USItype __t0, __t1, __t2;                                     \
 224   __asm__ ("%@ Inlined umul_ppmm\n"                                     \
 225            "    mov     %2, %5, lsr #16\n"                              \
 226            "    mov     %0, %6, lsr #16\n"                              \
 227            "    bic     %3, %5, %2, lsl #16\n"                          \
 228            "    bic     %4, %6, %0, lsl #16\n"                          \
 229            "    mul     %1, %3, %4\n"                                   \
 230            "    mul     %4, %2, %4\n"                                   \
 231            "    mul     %3, %0, %3\n"                                   \
 232            "    mul     %0, %2, %0\n"                                   \
 233            "    adds    %3, %4, %3\n"                                   \
 234            "    addcs   %0, %0, #65536\n"                               \
 235            "    adds    %1, %1, %3, lsl #16\n"                          \
 236            "    adc     %0, %0, %3, lsr #16"                            \
 237            : "=&r" ((USItype) (xh)),                                    \
 238              "=r" ((USItype) (xl)),                                     \
 239              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 240            : "r" ((USItype) (a)),                                       \
 241              "r" ((USItype) (b)) __CLOBBER_CC );}
 242 #define UMUL_TIME 20
 243 #define UDIV_TIME 100
 244 #endif /* __arm__ */
 245
 246 #if defined(__arm__)
 247 /* Let gcc decide how best to implement count_leading_zeros.  */
 248 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 249 #define COUNT_LEADING_ZEROS_0 32
 250 #endif
 251
 252 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 253 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 254 #if __CRIS_arch_version >= 8
 255 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 256 #endif
 257 #endif /* __CRIS__ */
 258
 259 #if defined (__hppa) && W_TYPE_SIZE == 32
 260 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 261   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 262            : "=r" ((USItype) (sh)),                                     \
 263              "=&r" ((USItype) (sl))                                     \
 264            : "%rM" ((USItype) (ah)),                                    \
 265              "rM" ((USItype) (bh)),                                     \
 266              "%rM" ((USItype) (al)),                                    \
 267              "rM" ((USItype) (bl)))
 268 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 269   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 270            : "=r" ((USItype) (sh)),                                     \
 271              "=&r" ((USItype) (sl))                                     \
 272            : "rM" ((USItype) (ah)),                                     \
 273              "rM" ((USItype) (bh)),                                     \
 274              "rM" ((USItype) (al)),                                     \
 275              "rM" ((USItype) (bl)))
 276 #if defined (_PA_RISC1_1)
 277 #define umul_ppmm(w1, w0, u, v) \
 278   do {                                                                  \
 279     union                                                               \
 280       {                                                                 \
 281         UDItype __f;                                                    \
 282         struct {USItype __w1, __w0;} __w1w0;                            \
 283       } __t;                                                            \
 284     __asm__ ("xmpyu %1,%2,%0"                                           \
 285              : "=x" (__t.__f)                                           \
 286              : "x" ((USItype) (u)),                                     \
 287                "x" ((USItype) (v)));                                    \
 288     (w1) = __t.__w1w0.__w1;                                             \
 289     (w0) = __t.__w1w0.__w0;                                             \
 290      } while (0)
 291 #define UMUL_TIME 8
 292 #else
 293 #define UMUL_TIME 30
 294 #endif
 295 #define UDIV_TIME 40
 296 #define count_leading_zeros(count, x) \
 297   do {                                                                  \
 298     USItype __tmp;                                                      \
 299     __asm__ (                                                           \
 300        "ldi             1,%0\n"                                         \
 301 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 302 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 303 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 304 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 305 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 306 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 307 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 308 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 309 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 310 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 311 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 312 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 313 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 314 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 315         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 316   } while (0)
 317 #endif
 318
 319 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 320 #if !defined (__zarch__)
 321 #define smul_ppmm(xh, xl, m0, m1) \
 322   do {                                                                  \
 323     union {DItype __ll;                                                 \
 324            struct {USItype __h, __l;} __i;                              \
 325           } __x;                                                        \
 326     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 327              : "=&r" (__x.__ll)                                         \
 328              : "r" (m0), "r" (m1));                                     \
 329     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 330   } while (0)
 331 #define sdiv_qrnnd(q, r, n1, n0, d) \
 332   do {                                                                  \
 333     union {DItype __ll;                                                 \
 334            struct {USItype __h, __l;} __i;                              \
 335           } __x;                                                        \
 336     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 337     __asm__ ("dr %0,%2"                                                 \
 338              : "=r" (__x.__ll)                                          \
 339              : "0" (__x.__ll), "r" (d));                                \
 340     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 341   } while (0)
 342 #else
 343 #define smul_ppmm(xh, xl, m0, m1) \
 344   do {                                                                  \
 345     register SItype __r0 __asm__ ("0");                                 \
 346     register SItype __r1 __asm__ ("1") = (m0);                          \
 347                                                                         \
 348     __asm__ ("mr\t%%r0,%3"                                              \
 349              : "=r" (__r0), "=r" (__r1)                                 \
 350              : "r"  (__r1),  "r" (m1));                                 \
 351     (xh) = __r0; (xl) = __r1;                                           \
 352   } while (0)
 353
 354 #define sdiv_qrnnd(q, r, n1, n0, d) \
 355   do {                                                                  \
 356     register SItype __r0 __asm__ ("0") = (n1);                          \
 357     register SItype __r1 __asm__ ("1") = (n0);                          \
 358                                                                         \
 359     __asm__ ("dr\t%%r0,%4"                                              \
 360              : "=r" (__r0), "=r" (__r1)                                 \
 361              : "r" (__r0), "r" (__r1), "r" (d));                        \
 362     (q) = __r1; (r) = __r0;                                             \
 363   } while (0)
 364 #endif /* __zarch__ */
 365 #endif
 366
 367 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 368 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 369   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 370            : "=r" ((USItype) (sh)),                                     \
 371              "=&r" ((USItype) (sl))                                     \
 372            : "%0" ((USItype) (ah)),                                     \
 373              "g" ((USItype) (bh)),                                      \
 374              "%1" ((USItype) (al)),                                     \
 375              "g" ((USItype) (bl)))
 376 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 377   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 378            : "=r" ((USItype) (sh)),                                     \
 379              "=&r" ((USItype) (sl))                                     \
 380            : "0" ((USItype) (ah)),                                      \
 381              "g" ((USItype) (bh)),                                      \
 382              "1" ((USItype) (al)),                                      \
 383              "g" ((USItype) (bl)))
 384 #define umul_ppmm(w1, w0, u, v) \
 385   __asm__ ("mul{l} %3"                                                  \
 386            : "=a" ((USItype) (w0)),                                     \
 387              "=d" ((USItype) (w1))                                      \
 388            : "%0" ((USItype) (u)),                                      \
 389              "rm" ((USItype) (v)))
 390 #define udiv_qrnnd(q, r, n1, n0, dv) \
 391   __asm__ ("div{l} %4"                                                  \
 392            : "=a" ((USItype) (q)),                                      \
 393              "=d" ((USItype) (r))                                       \
 394            : "0" ((USItype) (n0)),                                      \
 395              "1" ((USItype) (n1)),                                      \
 396              "rm" ((USItype) (dv)))
 397 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 398 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 399 #define UMUL_TIME 40
 400 #define UDIV_TIME 40
 401 #endif /* 80x86 */
 402
 403 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 404 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 405   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 406            : "=r" ((UDItype) (sh)),                                     \
 407              "=&r" ((UDItype) (sl))                                     \
 408            : "%0" ((UDItype) (ah)),                                     \
 409              "rme" ((UDItype) (bh)),                                    \
 410              "%1" ((UDItype) (al)),                                     \
 411              "rme" ((UDItype) (bl)))
 412 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 413   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 414            : "=r" ((UDItype) (sh)),                                     \
 415              "=&r" ((UDItype) (sl))                                     \
 416            : "0" ((UDItype) (ah)),                                      \
 417              "rme" ((UDItype) (bh)),                                    \
 418              "1" ((UDItype) (al)),                                      \
 419              "rme" ((UDItype) (bl)))
 420 #define umul_ppmm(w1, w0, u, v) \
 421   __asm__ ("mul{q} %3"                                                  \
 422            : "=a" ((UDItype) (w0)),                                     \
 423              "=d" ((UDItype) (w1))                                      \
 424            : "%0" ((UDItype) (u)),                                      \
 425              "rm" ((UDItype) (v)))
 426 #define udiv_qrnnd(q, r, n1, n0, dv) \
 427   __asm__ ("div{q} %4"                                                  \
 428            : "=a" ((UDItype) (q)),                                      \
 429              "=d" ((UDItype) (r))                                       \
 430            : "0" ((UDItype) (n0)),                                      \
 431              "1" ((UDItype) (n1)),                                      \
 432              "rm" ((UDItype) (dv)))
 433 #define count_leading_zeros(count, x)   ((count) = __builtin_clzl (x))
 434 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzl (x))
 435 #define UMUL_TIME 40
 436 #define UDIV_TIME 40
 437 #endif /* x86_64 */
 438
 439 #if defined (__i960__) && W_TYPE_SIZE == 32
 440 #define umul_ppmm(w1, w0, u, v) \
 441   ({union {UDItype __ll;                                                \
 442            struct {USItype __l, __h;} __i;                              \
 443           } __xx;                                                       \
 444   __asm__ ("emul        %2,%1,%0"                                       \
 445            : "=d" (__xx.__ll)                                           \
 446            : "%dI" ((USItype) (u)),                                     \
 447              "dI" ((USItype) (v)));                                     \
 448   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 449 #define __umulsidi3(u, v) \
 450   ({UDItype __w;                                                        \
 451     __asm__ ("emul      %2,%1,%0"                                       \
 452              : "=d" (__w)                                               \
 453              : "%dI" ((USItype) (u)),                                   \
 454                "dI" ((USItype) (v)));                                   \
 455     __w; })
 456 #endif /* __i960__ */
 457
 458 #if defined (__ia64) && W_TYPE_SIZE == 64
 459 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 460    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 461    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 462    register, which takes an extra cycle.  */
 463 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 464   do {                                                                  \
 465     UWtype __x;                                                         \
 466     __x = (al) - (bl);                                                  \
 467     if ((al) < (bl))                                                    \
 468       (sh) = (ah) - (bh) - 1;                                           \
 469     else                                                                \
 470       (sh) = (ah) - (bh);                                               \
 471     (sl) = __x;                                                         \
 472   } while (0)
 473
 474 /* Do both product parts in assembly, since that gives better code with
 475    all gcc versions.  Some callers will just use the upper part, and in
 476    that situation we waste an instruction, but not any cycles.  */
 477 #define umul_ppmm(ph, pl, m0, m1)                                       \
 478   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 479            : "=&f" (ph), "=f" (pl)                                      \
 480            : "f" (m0), "f" (m1))
 481 #define count_leading_zeros(count, x)                                   \
 482   do {                                                                  \
 483     UWtype _x = (x), _y, _a, _c;                                        \
 484     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 485     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 486     _c = (_a - 1) << 3;                                                 \
 487     _x >>= _c;                                                          \
 488     if (_x >= 1 << 4)                                                   \
 489       _x >>= 4, _c += 4;                                                \
 490     if (_x >= 1 << 2)                                                   \
 491       _x >>= 2, _c += 2;                                                \
 492     _c += _x >> 1;                                                      \
 493     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 494   } while (0)
 495 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 496    based, and we don't need a special case for x==0 here */
 497 #define count_trailing_zeros(count, x)                                  \
 498   do {                                                                  \
 499     UWtype __ctz_x = (x);                                               \
 500     __asm__ ("popcnt %0 = %1"                                           \
 501              : "=r" (count)                                             \
 502              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 503   } while (0)
 504 #define UMUL_TIME 14
 505 #endif
 506
 507 #if defined (__M32R__) && W_TYPE_SIZE == 32
 508 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 509   /* The cmp clears the condition bit.  */ \
 510   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 511            : "=r" ((USItype) (sh)),                                     \
 512              "=&r" ((USItype) (sl))                                     \
 513            : "0" ((USItype) (ah)),                                      \
 514              "r" ((USItype) (bh)),                                      \
 515              "1" ((USItype) (al)),                                      \
 516              "r" ((USItype) (bl))                                       \
 517            : "cbit")
 518 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 519   /* The cmp clears the condition bit.  */ \
 520   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 521            : "=r" ((USItype) (sh)),                                     \
 522              "=&r" ((USItype) (sl))                                     \
 523            : "0" ((USItype) (ah)),                                      \
 524              "r" ((USItype) (bh)),                                      \
 525              "1" ((USItype) (al)),                                      \
 526              "r" ((USItype) (bl))                                       \
 527            : "cbit")
 528 #endif /* __M32R__ */
 529
 530 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 531 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 532   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 533            : "=d" ((USItype) (sh)),                                     \
 534              "=&d" ((USItype) (sl))                                     \
 535            : "%0" ((USItype) (ah)),                                     \
 536              "d" ((USItype) (bh)),                                      \
 537              "%1" ((USItype) (al)),                                     \
 538              "g" ((USItype) (bl)))
 539 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 540   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 541            : "=d" ((USItype) (sh)),                                     \
 542              "=&d" ((USItype) (sl))                                     \
 543            : "0" ((USItype) (ah)),                                      \
 544              "d" ((USItype) (bh)),                                      \
 545              "1" ((USItype) (al)),                                      \
 546              "g" ((USItype) (bl)))
 547
 548 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 549 #if (defined (__mc68020__) && !defined (__mc68060__))
 550 #define umul_ppmm(w1, w0, u, v) \
 551   __asm__ ("mulu%.l %3,%1:%0"                                           \
 552            : "=d" ((USItype) (w0)),                                     \
 553              "=d" ((USItype) (w1))                                      \
 554            : "%0" ((USItype) (u)),                                      \
 555              "dmi" ((USItype) (v)))
 556 #define UMUL_TIME 45
 557 #define udiv_qrnnd(q, r, n1, n0, d) \
 558   __asm__ ("divu%.l %4,%1:%0"                                           \
 559            : "=d" ((USItype) (q)),                                      \
 560              "=d" ((USItype) (r))                                       \
 561            : "0" ((USItype) (n0)),                                      \
 562              "1" ((USItype) (n1)),                                      \
 563              "dmi" ((USItype) (d)))
 564 #define UDIV_TIME 90
 565 #define sdiv_qrnnd(q, r, n1, n0, d) \
 566   __asm__ ("divs%.l %4,%1:%0"                                           \
 567            : "=d" ((USItype) (q)),                                      \
 568              "=d" ((USItype) (r))                                       \
 569            : "0" ((USItype) (n0)),                                      \
 570              "1" ((USItype) (n1)),                                      \
 571              "dmi" ((USItype) (d)))
 572
 573 #elif defined (__mcoldfire__) /* not mc68020 */
 574
 575 #define umul_ppmm(xh, xl, a, b) \
 576   __asm__ ("| Inlined umul_ppmm\n"                                      \
 577            "    move%.l %2,%/d0\n"                                      \
 578            "    move%.l %3,%/d1\n"                                      \
 579            "    move%.l %/d0,%/d2\n"                                    \
 580            "    swap    %/d0\n"                                         \
 581            "    move%.l %/d1,%/d3\n"                                    \
 582            "    swap    %/d1\n"                                         \
 583            "    move%.w %/d2,%/d4\n"                                    \
 584            "    mulu    %/d3,%/d4\n"                                    \
 585            "    mulu    %/d1,%/d2\n"                                    \
 586            "    mulu    %/d0,%/d3\n"                                    \
 587            "    mulu    %/d0,%/d1\n"                                    \
 588            "    move%.l %/d4,%/d0\n"                                    \
 589            "    clr%.w  %/d0\n"                                         \
 590            "    swap    %/d0\n"                                         \
 591            "    add%.l  %/d0,%/d2\n"                                    \
 592            "    add%.l  %/d3,%/d2\n"                                    \
 593            "    jcc     1f\n"                                           \
 594            "    add%.l  %#65536,%/d1\n"                                 \
 595            "1:  swap    %/d2\n"                                         \
 596            "    moveq   %#0,%/d0\n"                                     \
 597            "    move%.w %/d2,%/d0\n"                                    \
 598            "    move%.w %/d4,%/d2\n"                                    \
 599            "    move%.l %/d2,%1\n"                                      \
 600            "    add%.l  %/d1,%/d0\n"                                    \
 601            "    move%.l %/d0,%0"                                        \
 602            : "=g" ((USItype) (xh)),                                     \
 603              "=g" ((USItype) (xl))                                      \
 604            : "g" ((USItype) (a)),                                       \
 605              "g" ((USItype) (b))                                        \
 606            : "d0", "d1", "d2", "d3", "d4")
 607 #define UMUL_TIME 100
 608 #define UDIV_TIME 400
 609 #else /* not ColdFire */
 610 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 611 #define umul_ppmm(xh, xl, a, b) \
 612   __asm__ ("| Inlined umul_ppmm\n"                                      \
 613            "    move%.l %2,%/d0\n"                                      \
 614            "    move%.l %3,%/d1\n"                                      \
 615            "    move%.l %/d0,%/d2\n"                                    \
 616            "    swap    %/d0\n"                                         \
 617            "    move%.l %/d1,%/d3\n"                                    \
 618            "    swap    %/d1\n"                                         \
 619            "    move%.w %/d2,%/d4\n"                                    \
 620            "    mulu    %/d3,%/d4\n"                                    \
 621            "    mulu    %/d1,%/d2\n"                                    \
 622            "    mulu    %/d0,%/d3\n"                                    \
 623            "    mulu    %/d0,%/d1\n"                                    \
 624            "    move%.l %/d4,%/d0\n"                                    \
 625            "    eor%.w  %/d0,%/d0\n"                                    \
 626            "    swap    %/d0\n"                                         \
 627            "    add%.l  %/d0,%/d2\n"                                    \
 628            "    add%.l  %/d3,%/d2\n"                                    \
 629            "    jcc     1f\n"                                           \
 630            "    add%.l  %#65536,%/d1\n"                                 \
 631            "1:  swap    %/d2\n"                                         \
 632            "    moveq   %#0,%/d0\n"                                     \
 633            "    move%.w %/d2,%/d0\n"                                    \
 634            "    move%.w %/d4,%/d2\n"                                    \
 635            "    move%.l %/d2,%1\n"                                      \
 636            "    add%.l  %/d1,%/d0\n"                                    \
 637            "    move%.l %/d0,%0"                                        \
 638            : "=g" ((USItype) (xh)),                                     \
 639              "=g" ((USItype) (xl))                                      \
 640            : "g" ((USItype) (a)),                                       \
 641              "g" ((USItype) (b))                                        \
 642            : "d0", "d1", "d2", "d3", "d4")
 643 #define UMUL_TIME 100
 644 #define UDIV_TIME 400
 645
 646 #endif /* not mc68020 */
 647
 648 /* The '020, '030, '040 and '060 have bitfield insns.
 649    cpu32 disguises as a 68020, but lacks them.  */
 650 #if defined (__mc68020__) && !defined (__mcpu32__)
 651 #define count_leading_zeros(count, x) \
 652   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 653            : "=d" ((USItype) (count))                                   \
 654            : "od" ((USItype) (x)), "n" (0))
 655 /* Some ColdFire architectures have a ff1 instruction supported via
 656    __builtin_clz. */
 657 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 658 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 659 #define COUNT_LEADING_ZEROS_0 32
 660 #endif
 661 #endif /* mc68000 */
 662
 663 #if defined (__m88000__) && W_TYPE_SIZE == 32
 664 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 665   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 666            : "=r" ((USItype) (sh)),                                     \
 667              "=&r" ((USItype) (sl))                                     \
 668            : "%rJ" ((USItype) (ah)),                                    \
 669              "rJ" ((USItype) (bh)),                                     \
 670              "%rJ" ((USItype) (al)),                                    \
 671              "rJ" ((USItype) (bl)))
 672 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 673   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 674            : "=r" ((USItype) (sh)),                                     \
 675              "=&r" ((USItype) (sl))                                     \
 676            : "rJ" ((USItype) (ah)),                                     \
 677              "rJ" ((USItype) (bh)),                                     \
 678              "rJ" ((USItype) (al)),                                     \
 679              "rJ" ((USItype) (bl)))
 680 #define count_leading_zeros(count, x) \
 681   do {                                                                  \
 682     USItype __cbtmp;                                                    \
 683     __asm__ ("ff1 %0,%1"                                                \
 684              : "=r" (__cbtmp)                                           \
 685              : "r" ((USItype) (x)));                                    \
 686     (count) = __cbtmp ^ 31;                                             \
 687   } while (0)
 688 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 689 #if defined (__mc88110__)
 690 #define umul_ppmm(wh, wl, u, v) \
 691   do {                                                                  \
 692     union {UDItype __ll;                                                \
 693            struct {USItype __h, __l;} __i;                              \
 694           } __xx;                                                       \
 695     __asm__ ("mulu.d    %0,%1,%2"                                       \
 696              : "=r" (__xx.__ll)                                         \
 697              : "r" ((USItype) (u)),                                     \
 698                "r" ((USItype) (v)));                                    \
 699     (wh) = __xx.__i.__h;                                                \
 700     (wl) = __xx.__i.__l;                                                \
 701   } while (0)
 702 #define udiv_qrnnd(q, r, n1, n0, d) \
 703   ({union {UDItype __ll;                                                \
 704            struct {USItype __h, __l;} __i;                              \
 705           } __xx;                                                       \
 706   USItype __q;                                                          \
 707   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 708   __asm__ ("divu.d %0,%1,%2"                                            \
 709            : "=r" (__q)                                                 \
 710            : "r" (__xx.__ll),                                           \
 711              "r" ((USItype) (d)));                                      \
 712   (r) = (n0) - __q * (d); (q) = __q; })
 713 #define UMUL_TIME 5
 714 #define UDIV_TIME 25
 715 #else
 716 #define UMUL_TIME 17
 717 #define UDIV_TIME 150
 718 #endif /* __mc88110__ */
 719 #endif /* __m88000__ */
 720
 721 #if defined (__mn10300__)
 722 # if defined (__AM33__)
 723 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 724 #  define umul_ppmm(w1, w0, u, v)               \
 725     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 726 #  define smul_ppmm(w1, w0, u, v)               \
 727     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 728 # else
 729 #  define umul_ppmm(w1, w0, u, v)               \
 730     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 731 #  define smul_ppmm(w1, w0, u, v)               \
 732     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 733 # endif
 734 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 735   do {                                          \
 736     DWunion __s, __a, __b;                      \
 737     __a.s.low = (al); __a.s.high = (ah);        \
 738     __b.s.low = (bl); __b.s.high = (bh);        \
 739     __s.ll = __a.ll + __b.ll;                   \
 740     (sl) = __s.s.low; (sh) = __s.s.high;        \
 741   } while (0)
 742 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 743   do {                                          \
 744     DWunion __s, __a, __b;                      \
 745     __a.s.low = (al); __a.s.high = (ah);        \
 746     __b.s.low = (bl); __b.s.high = (bh);        \
 747     __s.ll = __a.ll - __b.ll;                   \
 748     (sl) = __s.s.low; (sh) = __s.s.high;        \
 749   } while (0)
 750 # define udiv_qrnnd(q, r, nh, nl, d)            \
 751   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 752 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 753   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 754 # define UMUL_TIME 3
 755 # define UDIV_TIME 38
 756 #endif
 757
 758 #if defined (__mips__) && W_TYPE_SIZE == 32
 759 #define umul_ppmm(w1, w0, u, v)                                         \
 760   do {                                                                  \
 761     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 762     (w1) = (USItype) (__x >> 32);                                       \
 763     (w0) = (USItype) (__x);                                             \
 764   } while (0)
 765 #define UMUL_TIME 10
 766 #define UDIV_TIME 100
 767
 768 #if (__mips == 32 || __mips == 64) && ! __mips16
 769 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 770 #define COUNT_LEADING_ZEROS_0 32
 771 #endif
 772 #endif /* __mips__ */
 773
 774 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 775 #define umul_ppmm(w1, w0, u, v) \
 776   ({union {UDItype __ll;                                                \
 777            struct {USItype __l, __h;} __i;                              \
 778           } __xx;                                                       \
 779   __asm__ ("meid %2,%0"                                                 \
 780            : "=g" (__xx.__ll)                                           \
 781            : "%0" ((USItype) (u)),                                      \
 782              "g" ((USItype) (v)));                                      \
 783   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 784 #define __umulsidi3(u, v) \
 785   ({UDItype __w;                                                        \
 786     __asm__ ("meid %2,%0"                                               \
 787              : "=g" (__w)                                               \
 788              : "%0" ((USItype) (u)),                                    \
 789                "g" ((USItype) (v)));                                    \
 790     __w; })
 791 #define udiv_qrnnd(q, r, n1, n0, d) \
 792   ({union {UDItype __ll;                                                \
 793            struct {USItype __l, __h;} __i;                              \
 794           } __xx;                                                       \
 795   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 796   __asm__ ("deid %2,%0"                                                 \
 797            : "=g" (__xx.__ll)                                           \
 798            : "0" (__xx.__ll),                                           \
 799              "g" ((USItype) (d)));                                      \
 800   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 801 #define count_trailing_zeros(count,x) \
 802   do {                                                                  \
 803     __asm__ ("ffsd     %2,%0"                                           \
 804             : "=r" ((USItype) (count))                                  \
 805             : "0" ((USItype) 0),                                        \
 806               "r" ((USItype) (x)));                                     \
 807   } while (0)
 808 #endif /* __ns32000__ */
 809
 810 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 811    system vendor compilers.
 812    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 813    enough, since that hits ARM and m68k too.  */
 814 #if (defined (_ARCH_PPC)        /* AIX */                               \
 815      || defined (_ARCH_PWR)     /* AIX */                               \
 816      || defined (_ARCH_COM)     /* AIX */                               \
 817      || defined (__powerpc__)   /* gcc */                               \
 818      || defined (__POWERPC__)   /* BEOS */                              \
 819      || defined (__ppc__)       /* Darwin */                            \
 820      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 821      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 822          && CPU_FAMILY == PPC)                                                \
 823      ) && W_TYPE_SIZE == 32
 824 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 825   do {                                                                  \
 826     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 827       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 828              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 829     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 830       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 831              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 832     else                                                                \
 833       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 834              : "=r" (sh), "=&r" (sl)                                    \
 835              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 836   } while (0)
 837 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 838   do {                                                                  \
 839     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 840       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 841                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 842     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 843       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 844                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 845     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 846       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 847                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 848     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 849       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 850                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 851     else                                                                \
 852       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 853                : "=r" (sh), "=&r" (sl)                                  \
 854                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 855   } while (0)
 856 #define count_leading_zeros(count, x) \
 857   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 858 #define COUNT_LEADING_ZEROS_0 32
 859 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 860   || defined (__ppc__)                                                    \
 861   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 862   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 863          && CPU_FAMILY == PPC)
 864 #define umul_ppmm(ph, pl, m0, m1) \
 865   do {                                                                  \
 866     USItype __m0 = (m0), __m1 = (m1);                                   \
 867     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 868     (pl) = __m0 * __m1;                                                 \
 869   } while (0)
 870 #define UMUL_TIME 15
 871 #define smul_ppmm(ph, pl, m0, m1) \
 872   do {                                                                  \
 873     SItype __m0 = (m0), __m1 = (m1);                                    \
 874     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 875     (pl) = __m0 * __m1;                                                 \
 876   } while (0)
 877 #define SMUL_TIME 14
 878 #define UDIV_TIME 120
 879 #elif defined (_ARCH_PWR)
 880 #define UMUL_TIME 8
 881 #define smul_ppmm(xh, xl, m0, m1) \
 882   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 883 #define SMUL_TIME 4
 884 #define sdiv_qrnnd(q, r, nh, nl, d) \
 885   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 886 #define UDIV_TIME 100
 887 #endif
 888 #endif /* 32-bit POWER architecture variants.  */
 889
 890 /* We should test _IBMR2 here when we add assembly support for the system
 891    vendor compilers.  */
 892 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 893 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 894   do {                                                                  \
 895     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 896       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 897              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 898     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 899       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 900              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 901     else                                                                \
 902       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 903              : "=r" (sh), "=&r" (sl)                                    \
 904              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 905   } while (0)
 906 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 907   do {                                                                  \
 908     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 909       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 910                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 911     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 912       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 913                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 914     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 915       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 916                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 917     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 918       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 919                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 920     else                                                                \
 921       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 922                : "=r" (sh), "=&r" (sl)                                  \
 923                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 924   } while (0)
 925 #define count_leading_zeros(count, x) \
 926   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 927 #define COUNT_LEADING_ZEROS_0 64
 928 #define umul_ppmm(ph, pl, m0, m1) \
 929   do {                                                                  \
 930     UDItype __m0 = (m0), __m1 = (m1);                                   \
 931     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 932     (pl) = __m0 * __m1;                                                 \
 933   } while (0)
 934 #define UMUL_TIME 15
 935 #define smul_ppmm(ph, pl, m0, m1) \
 936   do {                                                                  \
 937     DItype __m0 = (m0), __m1 = (m1);                                    \
 938     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 939     (pl) = __m0 * __m1;                                                 \
 940   } while (0)
 941 #define SMUL_TIME 14  /* ??? */
 942 #define UDIV_TIME 120 /* ??? */
 943 #endif /* 64-bit PowerPC.  */
 944
 945 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 946 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 947   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 948            : "=r" ((USItype) (sh)),                                     \
 949              "=&r" ((USItype) (sl))                                     \
 950            : "%0" ((USItype) (ah)),                                     \
 951              "r" ((USItype) (bh)),                                      \
 952              "%1" ((USItype) (al)),                                     \
 953              "r" ((USItype) (bl)))
 954 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 955   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 956            : "=r" ((USItype) (sh)),                                     \
 957              "=&r" ((USItype) (sl))                                     \
 958            : "0" ((USItype) (ah)),                                      \
 959              "r" ((USItype) (bh)),                                      \
 960              "1" ((USItype) (al)),                                      \
 961              "r" ((USItype) (bl)))
 962 #define umul_ppmm(ph, pl, m0, m1) \
 963   do {                                                                  \
 964     USItype __m0 = (m0), __m1 = (m1);                                   \
 965     __asm__ (                                                           \
 966        "s       r2,r2\n"                                                \
 967 "       mts     r10,%2\n"                                               \
 968 "       m       r2,%3\n"                                                \
 969 "       m       r2,%3\n"                                                \
 970 "       m       r2,%3\n"                                                \
 971 "       m       r2,%3\n"                                                \
 972 "       m       r2,%3\n"                                                \
 973 "       m       r2,%3\n"                                                \
 974 "       m       r2,%3\n"                                                \
 975 "       m       r2,%3\n"                                                \
 976 "       m       r2,%3\n"                                                \
 977 "       m       r2,%3\n"                                                \
 978 "       m       r2,%3\n"                                                \
 979 "       m       r2,%3\n"                                                \
 980 "       m       r2,%3\n"                                                \
 981 "       m       r2,%3\n"                                                \
 982 "       m       r2,%3\n"                                                \
 983 "       m       r2,%3\n"                                                \
 984 "       cas     %0,r2,r0\n"                                             \
 985 "       mfs     r10,%1"                                                 \
 986              : "=r" ((USItype) (ph)),                                   \
 987                "=r" ((USItype) (pl))                                    \
 988              : "%r" (__m0),                                             \
 989                 "r" (__m1)                                              \
 990              : "r2");                                                   \
 991     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
 992              + (((SItype) __m1 >> 31) & __m0));                         \
 993   } while (0)
 994 #define UMUL_TIME 20
 995 #define UDIV_TIME 200
 996 #define count_leading_zeros(count, x) \
 997   do {                                                                  \
 998     if ((x) >= 0x10000)                                                 \
 999       __asm__ ("clz     %0,%1"                                          \
1000                : "=r" ((USItype) (count))                               \
1001                : "r" ((USItype) (x) >> 16));                            \
1002     else                                                                \
1003       {                                                                 \
1004         __asm__ ("clz   %0,%1"                                          \
1005                  : "=r" ((USItype) (count))                             \
1006                  : "r" ((USItype) (x)));                                        \
1007         (count) += 16;                                                  \
1008       }                                                                 \
1009   } while (0)
1010 #endif
1011
1012 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1013 #ifndef __sh1__
1014 #define umul_ppmm(w1, w0, u, v) \
1015   __asm__ (                                                             \
1016        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1017            : "=r<" ((USItype)(w1)),                                     \
1018              "=r<" ((USItype)(w0))                                      \
1019            : "r" ((USItype)(u)),                                        \
1020              "r" ((USItype)(v))                                         \
1021            : "macl", "mach")
1022 #define UMUL_TIME 5
1023 #endif
1024
1025 /* This is the same algorithm as __udiv_qrnnd_c.  */
1026 #define UDIV_NEEDS_NORMALIZATION 1
1027
1028 #define udiv_qrnnd(q, r, n1, n0, d) \
1029   do {                                                                  \
1030     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1031                         __attribute__ ((visibility ("hidden")));        \
1032     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1033     __asm__ (                                                           \
1034         "mov%M4 %4,r5\n"                                                \
1035 "       swap.w %3,r4\n"                                                 \
1036 "       swap.w r5,r6\n"                                                 \
1037 "       jsr @%5\n"                                                      \
1038 "       shll16 r6\n"                                                    \
1039 "       swap.w r4,r4\n"                                                 \
1040 "       jsr @%5\n"                                                      \
1041 "       swap.w r1,%0\n"                                                 \
1042 "       or r1,%0"                                                       \
1043         : "=r" (q), "=&z" (r)                                           \
1044         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1045         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1046   } while (0)
1047
1048 #define UDIV_TIME 80
1049
1050 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1051   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1052            : "=r" (sh), "=r" (sl)                                       \
1053            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1054
1055 #endif /* __sh__ */
1056
1057 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1058 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1059 #define count_leading_zeros(count, x) \
1060   do                                                                    \
1061     {                                                                   \
1062       UDItype x_ = (USItype)(x);                                        \
1063       SItype c_;                                                        \
1064                                                                         \
1065       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1066       (count) = c_ - 31;                                                \
1067     }                                                                   \
1068   while (0)
1069 #define COUNT_LEADING_ZEROS_0 32
1070 #endif
1071
1072 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1073     && W_TYPE_SIZE == 32
1074 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1075   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1076            : "=r" ((USItype) (sh)),                                     \
1077              "=&r" ((USItype) (sl))                                     \
1078            : "%rJ" ((USItype) (ah)),                                    \
1079              "rI" ((USItype) (bh)),                                     \
1080              "%rJ" ((USItype) (al)),                                    \
1081              "rI" ((USItype) (bl))                                      \
1082            __CLOBBER_CC)
1083 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1084   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1085            : "=r" ((USItype) (sh)),                                     \
1086              "=&r" ((USItype) (sl))                                     \
1087            : "rJ" ((USItype) (ah)),                                     \
1088              "rI" ((USItype) (bh)),                                     \
1089              "rJ" ((USItype) (al)),                                     \
1090              "rI" ((USItype) (bl))                                      \
1091            __CLOBBER_CC)
1092 #if defined (__sparc_v8__)
1093 #define umul_ppmm(w1, w0, u, v) \
1094   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1095            : "=r" ((USItype) (w1)),                                     \
1096              "=r" ((USItype) (w0))                                      \
1097            : "r" ((USItype) (u)),                                       \
1098              "r" ((USItype) (v)))
1099 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1100   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1101            : "=&r" ((USItype) (__q)),                                   \
1102              "=&r" ((USItype) (__r))                                    \
1103            : "r" ((USItype) (__n1)),                                    \
1104              "r" ((USItype) (__n0)),                                    \
1105              "r" ((USItype) (__d)))
1106 #else
1107 #if defined (__sparclite__)
1108 /* This has hardware multiply but not divide.  It also has two additional
1109    instructions scan (ffs from high bit) and divscc.  */
1110 #define umul_ppmm(w1, w0, u, v) \
1111   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1112            : "=r" ((USItype) (w1)),                                     \
1113              "=r" ((USItype) (w0))                                      \
1114            : "r" ((USItype) (u)),                                       \
1115              "r" ((USItype) (v)))
1116 #define udiv_qrnnd(q, r, n1, n0, d) \
1117   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1118 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1119 "       tst     %%g0\n"                                                 \
1120 "       divscc  %3,%4,%%g1\n"                                           \
1121 "       divscc  %%g1,%4,%%g1\n"                                         \
1122 "       divscc  %%g1,%4,%%g1\n"                                         \
1123 "       divscc  %%g1,%4,%%g1\n"                                         \
1124 "       divscc  %%g1,%4,%%g1\n"                                         \
1125 "       divscc  %%g1,%4,%%g1\n"                                         \
1126 "       divscc  %%g1,%4,%%g1\n"                                         \
1127 "       divscc  %%g1,%4,%%g1\n"                                         \
1128 "       divscc  %%g1,%4,%%g1\n"                                         \
1129 "       divscc  %%g1,%4,%%g1\n"                                         \
1130 "       divscc  %%g1,%4,%%g1\n"                                         \
1131 "       divscc  %%g1,%4,%%g1\n"                                         \
1132 "       divscc  %%g1,%4,%%g1\n"                                         \
1133 "       divscc  %%g1,%4,%%g1\n"                                         \
1134 "       divscc  %%g1,%4,%%g1\n"                                         \
1135 "       divscc  %%g1,%4,%%g1\n"                                         \
1136 "       divscc  %%g1,%4,%%g1\n"                                         \
1137 "       divscc  %%g1,%4,%%g1\n"                                         \
1138 "       divscc  %%g1,%4,%%g1\n"                                         \
1139 "       divscc  %%g1,%4,%%g1\n"                                         \
1140 "       divscc  %%g1,%4,%%g1\n"                                         \
1141 "       divscc  %%g1,%4,%%g1\n"                                         \
1142 "       divscc  %%g1,%4,%%g1\n"                                         \
1143 "       divscc  %%g1,%4,%%g1\n"                                         \
1144 "       divscc  %%g1,%4,%%g1\n"                                         \
1145 "       divscc  %%g1,%4,%%g1\n"                                         \
1146 "       divscc  %%g1,%4,%%g1\n"                                         \
1147 "       divscc  %%g1,%4,%%g1\n"                                         \
1148 "       divscc  %%g1,%4,%%g1\n"                                         \
1149 "       divscc  %%g1,%4,%%g1\n"                                         \
1150 "       divscc  %%g1,%4,%%g1\n"                                         \
1151 "       divscc  %%g1,%4,%0\n"                                           \
1152 "       rd      %%y,%1\n"                                               \
1153 "       bl,a 1f\n"                                                      \
1154 "       add     %1,%4,%1\n"                                             \
1155 "1:     ! End of inline udiv_qrnnd"                                     \
1156            : "=r" ((USItype) (q)),                                      \
1157              "=r" ((USItype) (r))                                       \
1158            : "r" ((USItype) (n1)),                                      \
1159              "r" ((USItype) (n0)),                                      \
1160              "rI" ((USItype) (d))                                       \
1161            : "g1" __AND_CLOBBER_CC)
1162 #define UDIV_TIME 37
1163 #define count_leading_zeros(count, x) \
1164   do {                                                                  \
1165   __asm__ ("scan %1,1,%0"                                               \
1166            : "=r" ((USItype) (count))                                   \
1167            : "r" ((USItype) (x)));                                      \
1168   } while (0)
1169 /* Early sparclites return 63 for an argument of 0, but they warn that future
1170    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1171    undefined.  */
1172 #else
1173 /* SPARC without integer multiplication and divide instructions.
1174    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1175 #define umul_ppmm(w1, w0, u, v) \
1176   __asm__ ("! Inlined umul_ppmm\n"                                      \
1177 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1178 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1179 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1180 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1181 "       mulscc  %%g1,%3,%%g1\n"                                         \
1182 "       mulscc  %%g1,%3,%%g1\n"                                         \
1183 "       mulscc  %%g1,%3,%%g1\n"                                         \
1184 "       mulscc  %%g1,%3,%%g1\n"                                         \
1185 "       mulscc  %%g1,%3,%%g1\n"                                         \
1186 "       mulscc  %%g1,%3,%%g1\n"                                         \
1187 "       mulscc  %%g1,%3,%%g1\n"                                         \
1188 "       mulscc  %%g1,%3,%%g1\n"                                         \
1189 "       mulscc  %%g1,%3,%%g1\n"                                         \
1190 "       mulscc  %%g1,%3,%%g1\n"                                         \
1191 "       mulscc  %%g1,%3,%%g1\n"                                         \
1192 "       mulscc  %%g1,%3,%%g1\n"                                         \
1193 "       mulscc  %%g1,%3,%%g1\n"                                         \
1194 "       mulscc  %%g1,%3,%%g1\n"                                         \
1195 "       mulscc  %%g1,%3,%%g1\n"                                         \
1196 "       mulscc  %%g1,%3,%%g1\n"                                         \
1197 "       mulscc  %%g1,%3,%%g1\n"                                         \
1198 "       mulscc  %%g1,%3,%%g1\n"                                         \
1199 "       mulscc  %%g1,%3,%%g1\n"                                         \
1200 "       mulscc  %%g1,%3,%%g1\n"                                         \
1201 "       mulscc  %%g1,%3,%%g1\n"                                         \
1202 "       mulscc  %%g1,%3,%%g1\n"                                         \
1203 "       mulscc  %%g1,%3,%%g1\n"                                         \
1204 "       mulscc  %%g1,%3,%%g1\n"                                         \
1205 "       mulscc  %%g1,%3,%%g1\n"                                         \
1206 "       mulscc  %%g1,%3,%%g1\n"                                         \
1207 "       mulscc  %%g1,%3,%%g1\n"                                         \
1208 "       mulscc  %%g1,%3,%%g1\n"                                         \
1209 "       mulscc  %%g1,%3,%%g1\n"                                         \
1210 "       mulscc  %%g1,%3,%%g1\n"                                         \
1211 "       mulscc  %%g1,%3,%%g1\n"                                         \
1212 "       mulscc  %%g1,%3,%%g1\n"                                         \
1213 "       mulscc  %%g1,0,%%g1\n"                                          \
1214 "       add     %%g1,%%o5,%0\n"                                         \
1215 "       rd      %%y,%1"                                                 \
1216            : "=r" ((USItype) (w1)),                                     \
1217              "=r" ((USItype) (w0))                                      \
1218            : "%rI" ((USItype) (u)),                                     \
1219              "r" ((USItype) (v))                                                \
1220            : "g1", "o5" __AND_CLOBBER_CC)
1221 #define UMUL_TIME 39            /* 39 instructions */
1222 /* It's quite necessary to add this much assembler for the sparc.
1223    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1224 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1225   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1226 "       mov     32,%%g1\n"                                              \
1227 "       subcc   %1,%2,%%g0\n"                                           \
1228 "1:     bcs     5f\n"                                                   \
1229 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1230 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1231 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1232 "       subcc   %%g1,1,%%g1\n"                                          \
1233 "2:     bne     1b\n"                                                   \
1234 "        subcc  %1,%2,%%g0\n"                                           \
1235 "       bcs     3f\n"                                                   \
1236 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1237 "       b       3f\n"                                                   \
1238 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1239 "4:     sub     %1,%2,%1\n"                                             \
1240 "5:     addxcc  %1,%1,%1\n"                                             \
1241 "       bcc     2b\n"                                                   \
1242 "        subcc  %%g1,1,%%g1\n"                                          \
1243 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1244 "       bne     4b\n"                                                   \
1245 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1246 "       sub     %1,%2,%1\n"                                             \
1247 "3:     xnor    %0,0,%0\n"                                              \
1248 "       ! End of inline udiv_qrnnd"                                     \
1249            : "=&r" ((USItype) (__q)),                                   \
1250              "=&r" ((USItype) (__r))                                    \
1251            : "r" ((USItype) (__d)),                                     \
1252              "1" ((USItype) (__n1)),                                    \
1253              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1254 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1255 #endif /* __sparclite__ */
1256 #endif /* __sparc_v8__ */
1257 #endif /* sparc32 */
1258
1259 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1260     && W_TYPE_SIZE == 64
1261 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1262   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1263            "add %r2,%3,%0\n\t"                                          \
1264            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1265            "add %0, 1, %0\n"                                            \
1266            "1:"                                                         \
1267            : "=r" ((UDItype)(sh)),                                      \
1268              "=&r" ((UDItype)(sl))                                      \
1269            : "%rJ" ((UDItype)(ah)),                                     \
1270              "rI" ((UDItype)(bh)),                                      \
1271              "%rJ" ((UDItype)(al)),                                     \
1272              "rI" ((UDItype)(bl))                                       \
1273            __CLOBBER_CC)
1274
1275 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1276   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1277            "sub %r2,%3,%0\n\t"                                          \
1278            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1279            "sub %0, 1, %0\n\t"                                          \
1280            "1:"                                                         \
1281            : "=r" ((UDItype)(sh)),                                      \
1282              "=&r" ((UDItype)(sl))                                      \
1283            : "rJ" ((UDItype)(ah)),                                      \
1284              "rI" ((UDItype)(bh)),                                      \
1285              "rJ" ((UDItype)(al)),                                      \
1286              "rI" ((UDItype)(bl))                                       \
1287            __CLOBBER_CC)
1288
1289 #define umul_ppmm(wh, wl, u, v)                                         \
1290   do {                                                                  \
1291           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1292           __asm__ __volatile__ (                                        \
1293                    "srl %7,0,%3\n\t"                                    \
1294                    "mulx %3,%6,%1\n\t"                                  \
1295                    "srlx %6,32,%2\n\t"                                  \
1296                    "mulx %2,%3,%4\n\t"                                  \
1297                    "sllx %4,32,%5\n\t"                                  \
1298                    "srl %6,0,%3\n\t"                                    \
1299                    "sub %1,%5,%5\n\t"                                   \
1300                    "srlx %5,32,%5\n\t"                                  \
1301                    "addcc %4,%5,%4\n\t"                                 \
1302                    "srlx %7,32,%5\n\t"                                  \
1303                    "mulx %3,%5,%3\n\t"                                  \
1304                    "mulx %2,%5,%5\n\t"                                  \
1305                    "sethi %%hi(0x80000000),%2\n\t"                      \
1306                    "addcc %4,%3,%4\n\t"                                 \
1307                    "srlx %4,32,%4\n\t"                                  \
1308                    "add %2,%2,%2\n\t"                                   \
1309                    "movcc %%xcc,%%g0,%2\n\t"                            \
1310                    "addcc %5,%4,%5\n\t"                                 \
1311                    "sllx %3,32,%3\n\t"                                  \
1312                    "add %1,%3,%1\n\t"                                   \
1313                    "add %5,%2,%0"                                       \
1314            : "=r" ((UDItype)(wh)),                                      \
1315              "=&r" ((UDItype)(wl)),                                     \
1316              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1317            : "r" ((UDItype)(u)),                                        \
1318              "r" ((UDItype)(v))                                         \
1319            __CLOBBER_CC);                                               \
1320   } while (0)
1321 #define UMUL_TIME 96
1322 #define UDIV_TIME 230
1323 #endif /* sparc64 */
1324
1325 #if defined (__vax__) && W_TYPE_SIZE == 32
1326 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1327   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1328            : "=g" ((USItype) (sh)),                                     \
1329              "=&g" ((USItype) (sl))                                     \
1330            : "%0" ((USItype) (ah)),                                     \
1331              "g" ((USItype) (bh)),                                      \
1332              "%1" ((USItype) (al)),                                     \
1333              "g" ((USItype) (bl)))
1334 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1335   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1336            : "=g" ((USItype) (sh)),                                     \
1337              "=&g" ((USItype) (sl))                                     \
1338            : "0" ((USItype) (ah)),                                      \
1339              "g" ((USItype) (bh)),                                      \
1340              "1" ((USItype) (al)),                                      \
1341              "g" ((USItype) (bl)))
1342 #define umul_ppmm(xh, xl, m0, m1) \
1343   do {                                                                  \
1344     union {                                                             \
1345         UDItype __ll;                                                   \
1346         struct {USItype __l, __h;} __i;                                 \
1347       } __xx;                                                           \
1348     USItype __m0 = (m0), __m1 = (m1);                                   \
1349     __asm__ ("emul %1,%2,$0,%0"                                         \
1350              : "=r" (__xx.__ll)                                         \
1351              : "g" (__m0),                                              \
1352                "g" (__m1));                                             \
1353     (xh) = __xx.__i.__h;                                                \
1354     (xl) = __xx.__i.__l;                                                \
1355     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1356              + (((SItype) __m1 >> 31) & __m0));                         \
1357   } while (0)
1358 #define sdiv_qrnnd(q, r, n1, n0, d) \
1359   do {                                                                  \
1360     union {DItype __ll;                                                 \
1361            struct {SItype __l, __h;} __i;                               \
1362           } __xx;                                                       \
1363     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1364     __asm__ ("ediv %3,%2,%0,%1"                                         \
1365              : "=g" (q), "=g" (r)                                       \
1366              : "g" (__xx.__ll), "g" (d));                               \
1367   } while (0)
1368 #endif /* __vax__ */
1369
1370 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1371 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1372    to expand builtin functions depending on what configuration features
1373    are available.  This avoids library calls when the operation can be
1374    performed in-line.  */
1375 #define umul_ppmm(w1, w0, u, v)                                         \
1376   do {                                                                  \
1377     DWunion __w;                                                        \
1378     __w.ll = __builtin_umulsidi3 (u, v);                                \
1379     w1 = __w.s.high;                                                    \
1380     w0 = __w.s.low;                                                     \
1381   } while (0)
1382 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1383 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1384 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1385 #endif /* __xtensa__ */
1386
1387 #if defined xstormy16
1388 extern UHItype __stormy16_count_leading_zeros (UHItype);
1389 #define count_leading_zeros(count, x)                                   \
1390   do                                                                    \
1391     {                                                                   \
1392       UHItype size;                                                     \
1393                                                                         \
1394       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1395       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1396         {                                                               \
1397           UHItype c;                                                    \
1398                                                                         \
1399           c = __clzhi2 ((x) >> (size - 16));                            \
1400           (count) += c;                                                 \
1401           if (c != 16)                                                  \
1402             break;                                                      \
1403         }                                                               \
1404     }                                                                   \
1405   while (0)
1406 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1407 #endif
1408
1409 #if defined (__z8000__) && W_TYPE_SIZE == 16
1410 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1411   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1412            : "=r" ((unsigned int)(sh)),                                 \
1413              "=&r" ((unsigned int)(sl))                                 \
1414            : "%0" ((unsigned int)(ah)),                                 \
1415              "r" ((unsigned int)(bh)),                                  \
1416              "%1" ((unsigned int)(al)),                                 \
1417              "rQR" ((unsigned int)(bl)))
1418 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1419   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1420            : "=r" ((unsigned int)(sh)),                                 \
1421              "=&r" ((unsigned int)(sl))                                 \
1422            : "0" ((unsigned int)(ah)),                                  \
1423              "r" ((unsigned int)(bh)),                                  \
1424              "1" ((unsigned int)(al)),                                  \
1425              "rQR" ((unsigned int)(bl)))
1426 #define umul_ppmm(xh, xl, m0, m1) \
1427   do {                                                                  \
1428     union {long int __ll;                                               \
1429            struct {unsigned int __h, __l;} __i;                         \
1430           } __xx;                                                       \
1431     unsigned int __m0 = (m0), __m1 = (m1);                              \
1432     __asm__ ("mult      %S0,%H3"                                        \
1433              : "=r" (__xx.__i.__h),                                     \
1434                "=r" (__xx.__i.__l)                                      \
1435              : "%1" (__m0),                                             \
1436                "rQR" (__m1));                                           \
1437     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1438     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1439              + (((signed int) __m1 >> 15) & __m0));                     \
1440   } while (0)
1441 #endif /* __z8000__ */
1442
1443 #endif /* __GNUC__ */
1444
1445 /* If this machine has no inline assembler, use C macros.  */
1446
1447 #if !defined (add_ssaaaa)
1448 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1449   do {                                                                  \
1450     UWtype __x;                                                         \
1451     __x = (al) + (bl);                                                  \
1452     (sh) = (ah) + (bh) + (__x < (al));                                  \
1453     (sl) = __x;                                                         \
1454   } while (0)
1455 #endif
1456
1457 #if !defined (sub_ddmmss)
1458 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1459   do {                                                                  \
1460     UWtype __x;                                                         \
1461     __x = (al) - (bl);                                                  \
1462     (sh) = (ah) - (bh) - (__x > (al));                                  \
1463     (sl) = __x;                                                         \
1464   } while (0)
1465 #endif
1466
1467 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1468    smul_ppmm.  */
1469 #if !defined (umul_ppmm) && defined (smul_ppmm)
1470 #define umul_ppmm(w1, w0, u, v)                                         \
1471   do {                                                                  \
1472     UWtype __w1;                                                        \
1473     UWtype __xm0 = (u), __xm1 = (v);                                    \
1474     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1475     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1476                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1477   } while (0)
1478 #endif
1479
1480 /* If we still don't have umul_ppmm, define it using plain C.  */
1481 #if !defined (umul_ppmm)
1482 #define umul_ppmm(w1, w0, u, v)                                         \
1483   do {                                                                  \
1484     UWtype __x0, __x1, __x2, __x3;                                      \
1485     UHWtype __ul, __vl, __uh, __vh;                                     \
1486                                                                         \
1487     __ul = __ll_lowpart (u);                                            \
1488     __uh = __ll_highpart (u);                                           \
1489     __vl = __ll_lowpart (v);                                            \
1490     __vh = __ll_highpart (v);                                           \
1491                                                                         \
1492     __x0 = (UWtype) __ul * __vl;                                        \
1493     __x1 = (UWtype) __ul * __vh;                                        \
1494     __x2 = (UWtype) __uh * __vl;                                        \
1495     __x3 = (UWtype) __uh * __vh;                                        \
1496                                                                         \
1497     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1498     __x1 += __x2;               /* but this indeed can */               \
1499     if (__x1 < __x2)            /* did we get it? */                    \
1500       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1501                                                                         \
1502     (w1) = __x3 + __ll_highpart (__x1);                                 \
1503     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1504   } while (0)
1505 #endif
1506
1507 #if !defined (__umulsidi3)
1508 #define __umulsidi3(u, v) \
1509   ({DWunion __w;                                                        \
1510     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1511     __w.ll; })
1512 #endif
1513
1514 /* Define this unconditionally, so it can be used for debugging.  */
1515 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1516   do {                                                                  \
1517     UWtype __d1, __d0, __q1, __q0;                                      \
1518     UWtype __r1, __r0, __m;                                             \
1519     __d1 = __ll_highpart (d);                                           \
1520     __d0 = __ll_lowpart (d);                                            \
1521                                                                         \
1522     __r1 = (n1) % __d1;                                                 \
1523     __q1 = (n1) / __d1;                                                 \
1524     __m = (UWtype) __q1 * __d0;                                         \
1525     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1526     if (__r1 < __m)                                                     \
1527       {                                                                 \
1528         __q1--, __r1 += (d);                                            \
1529         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1530           if (__r1 < __m)                                               \
1531             __q1--, __r1 += (d);                                        \
1532       }                                                                 \
1533     __r1 -= __m;                                                        \
1534                                                                         \
1535     __r0 = __r1 % __d1;                                                 \
1536     __q0 = __r1 / __d1;                                                 \
1537     __m = (UWtype) __q0 * __d0;                                         \
1538     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1539     if (__r0 < __m)                                                     \
1540       {                                                                 \
1541         __q0--, __r0 += (d);                                            \
1542         if (__r0 >= (d))                                                \
1543           if (__r0 < __m)                                               \
1544             __q0--, __r0 += (d);                                        \
1545       }                                                                 \
1546     __r0 -= __m;                                                        \
1547                                                                         \
1548     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1549     (r) = __r0;                                                         \
1550   } while (0)
1551
1552 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1553    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1554 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1555 #define udiv_qrnnd(q, r, nh, nl, d) \
1556   do {                                                                  \
1557     USItype __r;                                                        \
1558     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1559     (r) = __r;                                                          \
1560   } while (0)
1561 #endif
1562
1563 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1564 #if !defined (udiv_qrnnd)
1565 #define UDIV_NEEDS_NORMALIZATION 1
1566 #define udiv_qrnnd __udiv_qrnnd_c
1567 #endif
1568
1569 #if !defined (count_leading_zeros)
1570 #define count_leading_zeros(count, x) \
1571   do {                                                                  \
1572     UWtype __xr = (x);                                                  \
1573     UWtype __a;                                                         \
1574                                                                         \
1575     if (W_TYPE_SIZE <= 32)                                              \
1576       {                                                                 \
1577         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1578           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1579           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1580       }                                                                 \
1581     else                                                                \
1582       {                                                                 \
1583         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1584           if (((__xr >> __a) & 0xff) != 0)                              \
1585             break;                                                      \
1586       }                                                                 \
1587                                                                         \
1588     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1589   } while (0)
1590 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1591 #endif
1592
1593 #if !defined (count_trailing_zeros)
1594 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1595    defined in asm, but if it is not, the C version above is good enough.  */
1596 #define count_trailing_zeros(count, x) \
1597   do {                                                                  \
1598     UWtype __ctz_x = (x);                                               \
1599     UWtype __ctz_c;                                                     \
1600     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1601     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1602   } while (0)
1603 #endif
1604
1605 #ifndef UDIV_NEEDS_NORMALIZATION
1606 #define UDIV_NEEDS_NORMALIZATION 0
1607 #endif