libgcc/longlong.h

   1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2    Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   4    Free Software Foundation, Inc.
   5
   6    This file is part of the GNU C Library.
   7
   8    The GNU C Library is free software; you can redistribute it and/or
   9    modify it under the terms of the GNU Lesser General Public
  10    License as published by the Free Software Foundation; either
  11    version 2.1 of the License, or (at your option) any later version.
  12
  13    In addition to the permissions in the GNU Lesser General Public
  14    License, the Free Software Foundation gives you unlimited
  15    permission to link the compiled version of this file into
  16    combinations with other programs, and to distribute those
  17    combinations without any restriction coming from the use of this
  18    file.  (The Lesser General Public License restrictions do apply in
  19    other respects; for example, they cover modification of the file,
  20    and distribution when not linked into a combine executable.)
  21
  22    The GNU C Library is distributed in the hope that it will be useful,
  23    but WITHOUT ANY WARRANTY; without even the implied warranty of
  24    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25    Lesser General Public License for more details.
  26
  27    You should have received a copy of the GNU Lesser General Public
  28    License along with the GNU C Library; if not, write to the Free
  29    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
  30    MA 02110-1301, USA.  */
  31
  32 /* You have to define the following before including this file:
  33
  34    UWtype -- An unsigned type, default type for operations (typically a "word")
  35    UHWtype -- An unsigned type, at least half the size of UWtype.
  36    UDWtype -- An unsigned type, at least twice as large a UWtype
  37    W_TYPE_SIZE -- size in bits of UWtype
  38
  39    UQItype -- Unsigned 8 bit type.
  40    SItype, USItype -- Signed and unsigned 32 bit types.
  41    DItype, UDItype -- Signed and unsigned 64 bit types.
  42
  43    On a 32 bit machine UWtype should typically be USItype;
  44    on a 64 bit machine, UWtype should typically be UDItype.  */
  45
  46 #define __BITS4 (W_TYPE_SIZE / 4)
  47 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  48 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  49 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  50
  51 #ifndef W_TYPE_SIZE
  52 #define W_TYPE_SIZE     32
  53 #define UWtype          USItype
  54 #define UHWtype         USItype
  55 #define UDWtype         UDItype
  56 #endif
  57
  58 /* Used in glibc only.  */
  59 #ifndef attribute_hidden
  60 #define attribute_hidden
  61 #endif
  62
  63 extern const UQItype __clz_tab[256] attribute_hidden;
  64
  65 /* Define auxiliary asm macros.
  66
  67    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
  68    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
  69    word product in HIGH_PROD and LOW_PROD.
  70
  71    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  72    UDWtype product.  This is just a variant of umul_ppmm.
  73
  74    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  75    denominator) divides a UDWtype, composed by the UWtype integers
  76    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  77    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  78    than DENOMINATOR for correct operation.  If, in addition, the most
  79    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  80    UDIV_NEEDS_NORMALIZATION is defined to 1.
  81
  82    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  83    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  84    is rounded towards 0.
  85
  86    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  87    msb to the first nonzero bit in the UWtype X.  This is the number of
  88    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  89    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  90
  91    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  92    from the least significant end.
  93
  94    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  95    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  96    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  97    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  98    (i.e. carry out) is not stored anywhere, and is lost.
  99
 100    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
 101    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
 102    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
 103    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
 104    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
 105    and is lost.
 106
 107    If any of these macros are left undefined for a particular CPU,
 108    C macros are used.  */
 109
 110 /* The CPUs come in alphabetical order below.
 111
 112    Please add support for more CPUs here, or improve the current support
 113    for the CPUs below!
 114    (E.g. WE32100, IBM360.)  */
 115
 116 #if defined (__GNUC__) && !defined (NO_ASM)
 117
 118 /* We sometimes need to clobber "cc" with gcc2, but that would not be
 119    understood by gcc1.  Use cpp to avoid major code duplication.  */
 120 #if __GNUC__ < 2
 121 #define __CLOBBER_CC
 122 #define __AND_CLOBBER_CC
 123 #else /* __GNUC__ >= 2 */
 124 #define __CLOBBER_CC : "cc"
 125 #define __AND_CLOBBER_CC , "cc"
 126 #endif /* __GNUC__ < 2 */
 127
 128 #if defined (__alpha) && W_TYPE_SIZE == 64
 129 #define umul_ppmm(ph, pl, m0, m1) \
 130   do {                                                                  \
 131     UDItype __m0 = (m0), __m1 = (m1);                                   \
 132     (ph) = __builtin_alpha_umulh (__m0, __m1);                          \
 133     (pl) = __m0 * __m1;                                                 \
 134   } while (0)
 135 #define UMUL_TIME 46
 136 #ifndef LONGLONG_STANDALONE
 137 #define udiv_qrnnd(q, r, n1, n0, d) \
 138   do { UDItype __r;                                                     \
 139     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                         \
 140     (r) = __r;                                                          \
 141   } while (0)
 142 extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 143 #define UDIV_TIME 220
 144 #endif /* LONGLONG_STANDALONE */
 145 #ifdef __alpha_cix__
 146 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
 147 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
 148 #define COUNT_LEADING_ZEROS_0 64
 149 #else
 150 #define count_leading_zeros(COUNT,X) \
 151   do {                                                                  \
 152     UDItype __xr = (X), __t, __a;                                       \
 153     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 154     __a = __clz_tab[__t ^ 0xff] - 1;                                    \
 155     __t = __builtin_alpha_extbl (__xr, __a);                            \
 156     (COUNT) = 64 - (__clz_tab[__t] + __a*8);                            \
 157   } while (0)
 158 #define count_trailing_zeros(COUNT,X) \
 159   do {                                                                  \
 160     UDItype __xr = (X), __t, __a;                                       \
 161     __t = __builtin_alpha_cmpbge (0, __xr);                             \
 162     __t = ~__t & -~__t;                                                 \
 163     __a = ((__t & 0xCC) != 0) * 2;                                      \
 164     __a += ((__t & 0xF0) != 0) * 4;                                     \
 165     __a += ((__t & 0xAA) != 0);                                         \
 166     __t = __builtin_alpha_extbl (__xr, __a);                            \
 167     __a <<= 3;                                                          \
 168     __t &= -__t;                                                        \
 169     __a += ((__t & 0xCC) != 0) * 2;                                     \
 170     __a += ((__t & 0xF0) != 0) * 4;                                     \
 171     __a += ((__t & 0xAA) != 0);                                         \
 172     (COUNT) = __a;                                                      \
 173   } while (0)
 174 #endif /* __alpha_cix__ */
 175 #endif /* __alpha */
 176
 177 #if defined (__arc__) && W_TYPE_SIZE == 32
 178 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 179   __asm__ ("add.f       %1, %4, %5\n\tadc       %0, %2, %3"             \
 180            : "=r" ((USItype) (sh)),                                     \
 181              "=&r" ((USItype) (sl))                                     \
 182            : "%r" ((USItype) (ah)),                                     \
 183              "rIJ" ((USItype) (bh)),                                    \
 184              "%r" ((USItype) (al)),                                     \
 185              "rIJ" ((USItype) (bl)))
 186 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 187   __asm__ ("sub.f       %1, %4, %5\n\tsbc       %0, %2, %3"             \
 188            : "=r" ((USItype) (sh)),                                     \
 189              "=&r" ((USItype) (sl))                                     \
 190            : "r" ((USItype) (ah)),                                      \
 191              "rIJ" ((USItype) (bh)),                                    \
 192              "r" ((USItype) (al)),                                      \
 193              "rIJ" ((USItype) (bl)))
 194 /* Call libgcc routine.  */
 195 #define umul_ppmm(w1, w0, u, v) \
 196 do {                                                                    \
 197   DWunion __w;                                                          \
 198   __w.ll = __umulsidi3 (u, v);                                          \
 199   w1 = __w.s.high;                                                      \
 200   w0 = __w.s.low;                                                       \
 201 } while (0)
 202 #define __umulsidi3 __umulsidi3
 203 UDItype __umulsidi3 (USItype, USItype);
 204 #endif
 205
 206 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
 207  && W_TYPE_SIZE == 32
 208 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 209   __asm__ ("adds        %1, %4, %5\n\tadc       %0, %2, %3"             \
 210            : "=r" ((USItype) (sh)),                                     \
 211              "=&r" ((USItype) (sl))                                     \
 212            : "%r" ((USItype) (ah)),                                     \
 213              "rI" ((USItype) (bh)),                                     \
 214              "%r" ((USItype) (al)),                                     \
 215              "rI" ((USItype) (bl)) __CLOBBER_CC)
 216 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 217   __asm__ ("subs        %1, %4, %5\n\tsbc       %0, %2, %3"             \
 218            : "=r" ((USItype) (sh)),                                     \
 219              "=&r" ((USItype) (sl))                                     \
 220            : "r" ((USItype) (ah)),                                      \
 221              "rI" ((USItype) (bh)),                                     \
 222              "r" ((USItype) (al)),                                      \
 223              "rI" ((USItype) (bl)) __CLOBBER_CC)
 224 # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
 225      || defined(__ARM_ARCH_3__)
 226 #  define umul_ppmm(xh, xl, a, b)                                       \
 227   do {                                                                  \
 228     register USItype __t0, __t1, __t2;                                  \
 229     __asm__ ("%@ Inlined umul_ppmm\n"                                   \
 230            "    mov     %2, %5, lsr #16\n"                              \
 231            "    mov     %0, %6, lsr #16\n"                              \
 232            "    bic     %3, %5, %2, lsl #16\n"                          \
 233            "    bic     %4, %6, %0, lsl #16\n"                          \
 234            "    mul     %1, %3, %4\n"                                   \
 235            "    mul     %4, %2, %4\n"                                   \
 236            "    mul     %3, %0, %3\n"                                   \
 237            "    mul     %0, %2, %0\n"                                   \
 238            "    adds    %3, %4, %3\n"                                   \
 239            "    addcs   %0, %0, #65536\n"                               \
 240            "    adds    %1, %1, %3, lsl #16\n"                          \
 241            "    adc     %0, %0, %3, lsr #16"                            \
 242            : "=&r" ((USItype) (xh)),                                    \
 243              "=r" ((USItype) (xl)),                                     \
 244              "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
 245            : "r" ((USItype) (a)),                                       \
 246              "r" ((USItype) (b)) __CLOBBER_CC );                        \
 247   } while (0)
 248 #  define UMUL_TIME 20
 249 # else
 250 #  define umul_ppmm(xh, xl, a, b)                                       \
 251   do {                                                                  \
 252     /* Generate umull, under compiler control.  */                      \
 253     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);       \
 254     (xl) = (USItype)__t0;                                               \
 255     (xh) = (USItype)(__t0 >> 32);                                       \
 256   } while (0)
 257 #  define UMUL_TIME 3
 258 # endif
 259 # define UDIV_TIME 100
 260 #endif /* __arm__ */
 261
 262 #if defined(__arm__)
 263 /* Let gcc decide how best to implement count_leading_zeros.  */
 264 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 265 #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
 266 #define COUNT_LEADING_ZEROS_0 32
 267 #endif
 268
 269 #if defined (__AVR__)
 270
 271 #if W_TYPE_SIZE == 16
 272 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 273 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
 274 #define COUNT_LEADING_ZEROS_0 16
 275 #endif /* W_TYPE_SIZE == 16 */
 276
 277 #if W_TYPE_SIZE == 32
 278 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
 279 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
 280 #define COUNT_LEADING_ZEROS_0 32
 281 #endif /* W_TYPE_SIZE == 32 */
 282
 283 #if W_TYPE_SIZE == 64
 284 #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
 285 #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
 286 #define COUNT_LEADING_ZEROS_0 64
 287 #endif /* W_TYPE_SIZE == 64 */
 288
 289 #endif /* defined (__AVR__) */
 290
 291 #if defined (__CRIS__) && __CRIS_arch_version >= 3
 292 #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
 293 #if __CRIS_arch_version >= 8
 294 #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
 295 #endif
 296 #endif /* __CRIS__ */
 297
 298 #if defined (__hppa) && W_TYPE_SIZE == 32
 299 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 300   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"                              \
 301            : "=r" ((USItype) (sh)),                                     \
 302              "=&r" ((USItype) (sl))                                     \
 303            : "%rM" ((USItype) (ah)),                                    \
 304              "rM" ((USItype) (bh)),                                     \
 305              "%rM" ((USItype) (al)),                                    \
 306              "rM" ((USItype) (bl)))
 307 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 308   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"                              \
 309            : "=r" ((USItype) (sh)),                                     \
 310              "=&r" ((USItype) (sl))                                     \
 311            : "rM" ((USItype) (ah)),                                     \
 312              "rM" ((USItype) (bh)),                                     \
 313              "rM" ((USItype) (al)),                                     \
 314              "rM" ((USItype) (bl)))
 315 #if defined (_PA_RISC1_1)
 316 #define umul_ppmm(w1, w0, u, v) \
 317   do {                                                                  \
 318     union                                                               \
 319       {                                                                 \
 320         UDItype __f;                                                    \
 321         struct {USItype __w1, __w0;} __w1w0;                            \
 322       } __t;                                                            \
 323     __asm__ ("xmpyu %1,%2,%0"                                           \
 324              : "=x" (__t.__f)                                           \
 325              : "x" ((USItype) (u)),                                     \
 326                "x" ((USItype) (v)));                                    \
 327     (w1) = __t.__w1w0.__w1;                                             \
 328     (w0) = __t.__w1w0.__w0;                                             \
 329      } while (0)
 330 #define UMUL_TIME 8
 331 #else
 332 #define UMUL_TIME 30
 333 #endif
 334 #define UDIV_TIME 40
 335 #define count_leading_zeros(count, x) \
 336   do {                                                                  \
 337     USItype __tmp;                                                      \
 338     __asm__ (                                                           \
 339        "ldi             1,%0\n"                                         \
 340 "       extru,=         %1,15,16,%%r0           ; Bits 31..16 zero?\n"  \
 341 "       extru,tr        %1,15,16,%1             ; No.  Shift down, skip add.\n"\
 342 "       ldo             16(%0),%0               ; Yes.  Perform add.\n" \
 343 "       extru,=         %1,23,8,%%r0            ; Bits 15..8 zero?\n"   \
 344 "       extru,tr        %1,23,8,%1              ; No.  Shift down, skip add.\n"\
 345 "       ldo             8(%0),%0                ; Yes.  Perform add.\n" \
 346 "       extru,=         %1,27,4,%%r0            ; Bits 7..4 zero?\n"    \
 347 "       extru,tr        %1,27,4,%1              ; No.  Shift down, skip add.\n"\
 348 "       ldo             4(%0),%0                ; Yes.  Perform add.\n" \
 349 "       extru,=         %1,29,2,%%r0            ; Bits 3..2 zero?\n"    \
 350 "       extru,tr        %1,29,2,%1              ; No.  Shift down, skip add.\n"\
 351 "       ldo             2(%0),%0                ; Yes.  Perform add.\n" \
 352 "       extru           %1,30,1,%1              ; Extract bit 1.\n"     \
 353 "       sub             %0,%1,%0                ; Subtract it.\n"       \
 354         : "=r" (count), "=r" (__tmp) : "1" (x));                        \
 355   } while (0)
 356 #endif
 357
 358 #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 359 #if !defined (__zarch__)
 360 #define smul_ppmm(xh, xl, m0, m1) \
 361   do {                                                                  \
 362     union {DItype __ll;                                                 \
 363            struct {USItype __h, __l;} __i;                              \
 364           } __x;                                                        \
 365     __asm__ ("lr %N0,%1\n\tmr %0,%2"                                    \
 366              : "=&r" (__x.__ll)                                         \
 367              : "r" (m0), "r" (m1));                                     \
 368     (xh) = __x.__i.__h; (xl) = __x.__i.__l;                             \
 369   } while (0)
 370 #define sdiv_qrnnd(q, r, n1, n0, d) \
 371   do {                                                                  \
 372     union {DItype __ll;                                                 \
 373            struct {USItype __h, __l;} __i;                              \
 374           } __x;                                                        \
 375     __x.__i.__h = n1; __x.__i.__l = n0;                                 \
 376     __asm__ ("dr %0,%2"                                                 \
 377              : "=r" (__x.__ll)                                          \
 378              : "0" (__x.__ll), "r" (d));                                \
 379     (q) = __x.__i.__l; (r) = __x.__i.__h;                               \
 380   } while (0)
 381 #else
 382 #define smul_ppmm(xh, xl, m0, m1) \
 383   do {                                                                  \
 384     register SItype __r0 __asm__ ("0");                                 \
 385     register SItype __r1 __asm__ ("1") = (m0);                          \
 386                                                                         \
 387     __asm__ ("mr\t%%r0,%3"                                              \
 388              : "=r" (__r0), "=r" (__r1)                                 \
 389              : "r"  (__r1),  "r" (m1));                                 \
 390     (xh) = __r0; (xl) = __r1;                                           \
 391   } while (0)
 392
 393 #define sdiv_qrnnd(q, r, n1, n0, d) \
 394   do {                                                                  \
 395     register SItype __r0 __asm__ ("0") = (n1);                          \
 396     register SItype __r1 __asm__ ("1") = (n0);                          \
 397                                                                         \
 398     __asm__ ("dr\t%%r0,%4"                                              \
 399              : "=r" (__r0), "=r" (__r1)                                 \
 400              : "r" (__r0), "r" (__r1), "r" (d));                        \
 401     (q) = __r1; (r) = __r0;                                             \
 402   } while (0)
 403 #endif /* __zarch__ */
 404 #endif
 405
 406 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 407 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 408   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"               \
 409            : "=r" ((USItype) (sh)),                                     \
 410              "=&r" ((USItype) (sl))                                     \
 411            : "%0" ((USItype) (ah)),                                     \
 412              "g" ((USItype) (bh)),                                      \
 413              "%1" ((USItype) (al)),                                     \
 414              "g" ((USItype) (bl)))
 415 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 416   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"               \
 417            : "=r" ((USItype) (sh)),                                     \
 418              "=&r" ((USItype) (sl))                                     \
 419            : "0" ((USItype) (ah)),                                      \
 420              "g" ((USItype) (bh)),                                      \
 421              "1" ((USItype) (al)),                                      \
 422              "g" ((USItype) (bl)))
 423 #define umul_ppmm(w1, w0, u, v) \
 424   __asm__ ("mul{l} %3"                                                  \
 425            : "=a" ((USItype) (w0)),                                     \
 426              "=d" ((USItype) (w1))                                      \
 427            : "%0" ((USItype) (u)),                                      \
 428              "rm" ((USItype) (v)))
 429 #define udiv_qrnnd(q, r, n1, n0, dv) \
 430   __asm__ ("div{l} %4"                                                  \
 431            : "=a" ((USItype) (q)),                                      \
 432              "=d" ((USItype) (r))                                       \
 433            : "0" ((USItype) (n0)),                                      \
 434              "1" ((USItype) (n1)),                                      \
 435              "rm" ((USItype) (dv)))
 436 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
 437 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
 438 #define UMUL_TIME 40
 439 #define UDIV_TIME 40
 440 #endif /* 80x86 */
 441
 442 #if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
 443 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 444   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"               \
 445            : "=r" ((UDItype) (sh)),                                     \
 446              "=&r" ((UDItype) (sl))                                     \
 447            : "%0" ((UDItype) (ah)),                                     \
 448              "rme" ((UDItype) (bh)),                                    \
 449              "%1" ((UDItype) (al)),                                     \
 450              "rme" ((UDItype) (bl)))
 451 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 452   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"               \
 453            : "=r" ((UDItype) (sh)),                                     \
 454              "=&r" ((UDItype) (sl))                                     \
 455            : "0" ((UDItype) (ah)),                                      \
 456              "rme" ((UDItype) (bh)),                                    \
 457              "1" ((UDItype) (al)),                                      \
 458              "rme" ((UDItype) (bl)))
 459 #define umul_ppmm(w1, w0, u, v) \
 460   __asm__ ("mul{q} %3"                                                  \
 461            : "=a" ((UDItype) (w0)),                                     \
 462              "=d" ((UDItype) (w1))                                      \
 463            : "%0" ((UDItype) (u)),                                      \
 464              "rm" ((UDItype) (v)))
 465 #define udiv_qrnnd(q, r, n1, n0, dv) \
 466   __asm__ ("div{q} %4"                                                  \
 467            : "=a" ((UDItype) (q)),                                      \
 468              "=d" ((UDItype) (r))                                       \
 469            : "0" ((UDItype) (n0)),                                      \
 470              "1" ((UDItype) (n1)),                                      \
 471              "rm" ((UDItype) (dv)))
 472 #define count_leading_zeros(count, x)   ((count) = __builtin_clzll (x))
 473 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctzll (x))
 474 #define UMUL_TIME 40
 475 #define UDIV_TIME 40
 476 #endif /* x86_64 */
 477
 478 #if defined (__i960__) && W_TYPE_SIZE == 32
 479 #define umul_ppmm(w1, w0, u, v) \
 480   ({union {UDItype __ll;                                                \
 481            struct {USItype __l, __h;} __i;                              \
 482           } __xx;                                                       \
 483   __asm__ ("emul        %2,%1,%0"                                       \
 484            : "=d" (__xx.__ll)                                           \
 485            : "%dI" ((USItype) (u)),                                     \
 486              "dI" ((USItype) (v)));                                     \
 487   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 488 #define __umulsidi3(u, v) \
 489   ({UDItype __w;                                                        \
 490     __asm__ ("emul      %2,%1,%0"                                       \
 491              : "=d" (__w)                                               \
 492              : "%dI" ((USItype) (u)),                                   \
 493                "dI" ((USItype) (v)));                                   \
 494     __w; })
 495 #endif /* __i960__ */
 496
 497 #if defined (__ia64) && W_TYPE_SIZE == 64
 498 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
 499    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
 500    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
 501    register, which takes an extra cycle.  */
 502 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
 503   do {                                                                  \
 504     UWtype __x;                                                         \
 505     __x = (al) - (bl);                                                  \
 506     if ((al) < (bl))                                                    \
 507       (sh) = (ah) - (bh) - 1;                                           \
 508     else                                                                \
 509       (sh) = (ah) - (bh);                                               \
 510     (sl) = __x;                                                         \
 511   } while (0)
 512
 513 /* Do both product parts in assembly, since that gives better code with
 514    all gcc versions.  Some callers will just use the upper part, and in
 515    that situation we waste an instruction, but not any cycles.  */
 516 #define umul_ppmm(ph, pl, m0, m1)                                       \
 517   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"            \
 518            : "=&f" (ph), "=f" (pl)                                      \
 519            : "f" (m0), "f" (m1))
 520 #define count_leading_zeros(count, x)                                   \
 521   do {                                                                  \
 522     UWtype _x = (x), _y, _a, _c;                                        \
 523     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));              \
 524     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));            \
 525     _c = (_a - 1) << 3;                                                 \
 526     _x >>= _c;                                                          \
 527     if (_x >= 1 << 4)                                                   \
 528       _x >>= 4, _c += 4;                                                \
 529     if (_x >= 1 << 2)                                                   \
 530       _x >>= 2, _c += 2;                                                \
 531     _c += _x >> 1;                                                      \
 532     (count) =  W_TYPE_SIZE - 1 - _c;                                    \
 533   } while (0)
 534 /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
 535    based, and we don't need a special case for x==0 here */
 536 #define count_trailing_zeros(count, x)                                  \
 537   do {                                                                  \
 538     UWtype __ctz_x = (x);                                               \
 539     __asm__ ("popcnt %0 = %1"                                           \
 540              : "=r" (count)                                             \
 541              : "r" ((__ctz_x-1) & ~__ctz_x));                           \
 542   } while (0)
 543 #define UMUL_TIME 14
 544 #endif
 545
 546 #if defined (__M32R__) && W_TYPE_SIZE == 32
 547 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 548   /* The cmp clears the condition bit.  */ \
 549   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"                      \
 550            : "=r" ((USItype) (sh)),                                     \
 551              "=&r" ((USItype) (sl))                                     \
 552            : "0" ((USItype) (ah)),                                      \
 553              "r" ((USItype) (bh)),                                      \
 554              "1" ((USItype) (al)),                                      \
 555              "r" ((USItype) (bl))                                       \
 556            : "cbit")
 557 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 558   /* The cmp clears the condition bit.  */ \
 559   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"                      \
 560            : "=r" ((USItype) (sh)),                                     \
 561              "=&r" ((USItype) (sl))                                     \
 562            : "0" ((USItype) (ah)),                                      \
 563              "r" ((USItype) (bh)),                                      \
 564              "1" ((USItype) (al)),                                      \
 565              "r" ((USItype) (bl))                                       \
 566            : "cbit")
 567 #endif /* __M32R__ */
 568
 569 #if defined (__mc68000__) && W_TYPE_SIZE == 32
 570 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 571   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"                              \
 572            : "=d" ((USItype) (sh)),                                     \
 573              "=&d" ((USItype) (sl))                                     \
 574            : "%0" ((USItype) (ah)),                                     \
 575              "d" ((USItype) (bh)),                                      \
 576              "%1" ((USItype) (al)),                                     \
 577              "g" ((USItype) (bl)))
 578 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 579   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"                              \
 580            : "=d" ((USItype) (sh)),                                     \
 581              "=&d" ((USItype) (sl))                                     \
 582            : "0" ((USItype) (ah)),                                      \
 583              "d" ((USItype) (bh)),                                      \
 584              "1" ((USItype) (al)),                                      \
 585              "g" ((USItype) (bl)))
 586
 587 /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
 588 #if (defined (__mc68020__) && !defined (__mc68060__))
 589 #define umul_ppmm(w1, w0, u, v) \
 590   __asm__ ("mulu%.l %3,%1:%0"                                           \
 591            : "=d" ((USItype) (w0)),                                     \
 592              "=d" ((USItype) (w1))                                      \
 593            : "%0" ((USItype) (u)),                                      \
 594              "dmi" ((USItype) (v)))
 595 #define UMUL_TIME 45
 596 #define udiv_qrnnd(q, r, n1, n0, d) \
 597   __asm__ ("divu%.l %4,%1:%0"                                           \
 598            : "=d" ((USItype) (q)),                                      \
 599              "=d" ((USItype) (r))                                       \
 600            : "0" ((USItype) (n0)),                                      \
 601              "1" ((USItype) (n1)),                                      \
 602              "dmi" ((USItype) (d)))
 603 #define UDIV_TIME 90
 604 #define sdiv_qrnnd(q, r, n1, n0, d) \
 605   __asm__ ("divs%.l %4,%1:%0"                                           \
 606            : "=d" ((USItype) (q)),                                      \
 607              "=d" ((USItype) (r))                                       \
 608            : "0" ((USItype) (n0)),                                      \
 609              "1" ((USItype) (n1)),                                      \
 610              "dmi" ((USItype) (d)))
 611
 612 #elif defined (__mcoldfire__) /* not mc68020 */
 613
 614 #define umul_ppmm(xh, xl, a, b) \
 615   __asm__ ("| Inlined umul_ppmm\n"                                      \
 616            "    move%.l %2,%/d0\n"                                      \
 617            "    move%.l %3,%/d1\n"                                      \
 618            "    move%.l %/d0,%/d2\n"                                    \
 619            "    swap    %/d0\n"                                         \
 620            "    move%.l %/d1,%/d3\n"                                    \
 621            "    swap    %/d1\n"                                         \
 622            "    move%.w %/d2,%/d4\n"                                    \
 623            "    mulu    %/d3,%/d4\n"                                    \
 624            "    mulu    %/d1,%/d2\n"                                    \
 625            "    mulu    %/d0,%/d3\n"                                    \
 626            "    mulu    %/d0,%/d1\n"                                    \
 627            "    move%.l %/d4,%/d0\n"                                    \
 628            "    clr%.w  %/d0\n"                                         \
 629            "    swap    %/d0\n"                                         \
 630            "    add%.l  %/d0,%/d2\n"                                    \
 631            "    add%.l  %/d3,%/d2\n"                                    \
 632            "    jcc     1f\n"                                           \
 633            "    add%.l  %#65536,%/d1\n"                                 \
 634            "1:  swap    %/d2\n"                                         \
 635            "    moveq   %#0,%/d0\n"                                     \
 636            "    move%.w %/d2,%/d0\n"                                    \
 637            "    move%.w %/d4,%/d2\n"                                    \
 638            "    move%.l %/d2,%1\n"                                      \
 639            "    add%.l  %/d1,%/d0\n"                                    \
 640            "    move%.l %/d0,%0"                                        \
 641            : "=g" ((USItype) (xh)),                                     \
 642              "=g" ((USItype) (xl))                                      \
 643            : "g" ((USItype) (a)),                                       \
 644              "g" ((USItype) (b))                                        \
 645            : "d0", "d1", "d2", "d3", "d4")
 646 #define UMUL_TIME 100
 647 #define UDIV_TIME 400
 648 #else /* not ColdFire */
 649 /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
 650 #define umul_ppmm(xh, xl, a, b) \
 651   __asm__ ("| Inlined umul_ppmm\n"                                      \
 652            "    move%.l %2,%/d0\n"                                      \
 653            "    move%.l %3,%/d1\n"                                      \
 654            "    move%.l %/d0,%/d2\n"                                    \
 655            "    swap    %/d0\n"                                         \
 656            "    move%.l %/d1,%/d3\n"                                    \
 657            "    swap    %/d1\n"                                         \
 658            "    move%.w %/d2,%/d4\n"                                    \
 659            "    mulu    %/d3,%/d4\n"                                    \
 660            "    mulu    %/d1,%/d2\n"                                    \
 661            "    mulu    %/d0,%/d3\n"                                    \
 662            "    mulu    %/d0,%/d1\n"                                    \
 663            "    move%.l %/d4,%/d0\n"                                    \
 664            "    eor%.w  %/d0,%/d0\n"                                    \
 665            "    swap    %/d0\n"                                         \
 666            "    add%.l  %/d0,%/d2\n"                                    \
 667            "    add%.l  %/d3,%/d2\n"                                    \
 668            "    jcc     1f\n"                                           \
 669            "    add%.l  %#65536,%/d1\n"                                 \
 670            "1:  swap    %/d2\n"                                         \
 671            "    moveq   %#0,%/d0\n"                                     \
 672            "    move%.w %/d2,%/d0\n"                                    \
 673            "    move%.w %/d4,%/d2\n"                                    \
 674            "    move%.l %/d2,%1\n"                                      \
 675            "    add%.l  %/d1,%/d0\n"                                    \
 676            "    move%.l %/d0,%0"                                        \
 677            : "=g" ((USItype) (xh)),                                     \
 678              "=g" ((USItype) (xl))                                      \
 679            : "g" ((USItype) (a)),                                       \
 680              "g" ((USItype) (b))                                        \
 681            : "d0", "d1", "d2", "d3", "d4")
 682 #define UMUL_TIME 100
 683 #define UDIV_TIME 400
 684
 685 #endif /* not mc68020 */
 686
 687 /* The '020, '030, '040 and '060 have bitfield insns.
 688    cpu32 disguises as a 68020, but lacks them.  */
 689 #if defined (__mc68020__) && !defined (__mcpu32__)
 690 #define count_leading_zeros(count, x) \
 691   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
 692            : "=d" ((USItype) (count))                                   \
 693            : "od" ((USItype) (x)), "n" (0))
 694 /* Some ColdFire architectures have a ff1 instruction supported via
 695    __builtin_clz. */
 696 #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
 697 #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
 698 #define COUNT_LEADING_ZEROS_0 32
 699 #endif
 700 #endif /* mc68000 */
 701
 702 #if defined (__m88000__) && W_TYPE_SIZE == 32
 703 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 704   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"                   \
 705            : "=r" ((USItype) (sh)),                                     \
 706              "=&r" ((USItype) (sl))                                     \
 707            : "%rJ" ((USItype) (ah)),                                    \
 708              "rJ" ((USItype) (bh)),                                     \
 709              "%rJ" ((USItype) (al)),                                    \
 710              "rJ" ((USItype) (bl)))
 711 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 712   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"                   \
 713            : "=r" ((USItype) (sh)),                                     \
 714              "=&r" ((USItype) (sl))                                     \
 715            : "rJ" ((USItype) (ah)),                                     \
 716              "rJ" ((USItype) (bh)),                                     \
 717              "rJ" ((USItype) (al)),                                     \
 718              "rJ" ((USItype) (bl)))
 719 #define count_leading_zeros(count, x) \
 720   do {                                                                  \
 721     USItype __cbtmp;                                                    \
 722     __asm__ ("ff1 %0,%1"                                                \
 723              : "=r" (__cbtmp)                                           \
 724              : "r" ((USItype) (x)));                                    \
 725     (count) = __cbtmp ^ 31;                                             \
 726   } while (0)
 727 #define COUNT_LEADING_ZEROS_0 63 /* sic */
 728 #if defined (__mc88110__)
 729 #define umul_ppmm(wh, wl, u, v) \
 730   do {                                                                  \
 731     union {UDItype __ll;                                                \
 732            struct {USItype __h, __l;} __i;                              \
 733           } __xx;                                                       \
 734     __asm__ ("mulu.d    %0,%1,%2"                                       \
 735              : "=r" (__xx.__ll)                                         \
 736              : "r" ((USItype) (u)),                                     \
 737                "r" ((USItype) (v)));                                    \
 738     (wh) = __xx.__i.__h;                                                \
 739     (wl) = __xx.__i.__l;                                                \
 740   } while (0)
 741 #define udiv_qrnnd(q, r, n1, n0, d) \
 742   ({union {UDItype __ll;                                                \
 743            struct {USItype __h, __l;} __i;                              \
 744           } __xx;                                                       \
 745   USItype __q;                                                          \
 746   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 747   __asm__ ("divu.d %0,%1,%2"                                            \
 748            : "=r" (__q)                                                 \
 749            : "r" (__xx.__ll),                                           \
 750              "r" ((USItype) (d)));                                      \
 751   (r) = (n0) - __q * (d); (q) = __q; })
 752 #define UMUL_TIME 5
 753 #define UDIV_TIME 25
 754 #else
 755 #define UMUL_TIME 17
 756 #define UDIV_TIME 150
 757 #endif /* __mc88110__ */
 758 #endif /* __m88000__ */
 759
 760 #if defined (__mn10300__)
 761 # if defined (__AM33__)
 762 #  define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
 763 #  define umul_ppmm(w1, w0, u, v)               \
 764     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 765 #  define smul_ppmm(w1, w0, u, v)               \
 766     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
 767 # else
 768 #  define umul_ppmm(w1, w0, u, v)               \
 769     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 770 #  define smul_ppmm(w1, w0, u, v)               \
 771     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
 772 # endif
 773 # define add_ssaaaa(sh, sl, ah, al, bh, bl)     \
 774   do {                                          \
 775     DWunion __s, __a, __b;                      \
 776     __a.s.low = (al); __a.s.high = (ah);        \
 777     __b.s.low = (bl); __b.s.high = (bh);        \
 778     __s.ll = __a.ll + __b.ll;                   \
 779     (sl) = __s.s.low; (sh) = __s.s.high;        \
 780   } while (0)
 781 # define sub_ddmmss(sh, sl, ah, al, bh, bl)     \
 782   do {                                          \
 783     DWunion __s, __a, __b;                      \
 784     __a.s.low = (al); __a.s.high = (ah);        \
 785     __b.s.low = (bl); __b.s.high = (bh);        \
 786     __s.ll = __a.ll - __b.ll;                   \
 787     (sl) = __s.s.low; (sh) = __s.s.high;        \
 788   } while (0)
 789 # define udiv_qrnnd(q, r, nh, nl, d)            \
 790   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 791 # define sdiv_qrnnd(q, r, nh, nl, d)            \
 792   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
 793 # define UMUL_TIME 3
 794 # define UDIV_TIME 38
 795 #endif
 796
 797 #if defined (__mips__) && W_TYPE_SIZE == 32
 798 #define umul_ppmm(w1, w0, u, v)                                         \
 799   do {                                                                  \
 800     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
 801     (w1) = (USItype) (__x >> 32);                                       \
 802     (w0) = (USItype) (__x);                                             \
 803   } while (0)
 804 #define UMUL_TIME 10
 805 #define UDIV_TIME 100
 806
 807 #if (__mips == 32 || __mips == 64) && ! __mips16
 808 #define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clz (X))
 809 #define COUNT_LEADING_ZEROS_0 32
 810 #endif
 811 #endif /* __mips__ */
 812
 813 #if defined (__ns32000__) && W_TYPE_SIZE == 32
 814 #define umul_ppmm(w1, w0, u, v) \
 815   ({union {UDItype __ll;                                                \
 816            struct {USItype __l, __h;} __i;                              \
 817           } __xx;                                                       \
 818   __asm__ ("meid %2,%0"                                                 \
 819            : "=g" (__xx.__ll)                                           \
 820            : "%0" ((USItype) (u)),                                      \
 821              "g" ((USItype) (v)));                                      \
 822   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 823 #define __umulsidi3(u, v) \
 824   ({UDItype __w;                                                        \
 825     __asm__ ("meid %2,%0"                                               \
 826              : "=g" (__w)                                               \
 827              : "%0" ((USItype) (u)),                                    \
 828                "g" ((USItype) (v)));                                    \
 829     __w; })
 830 #define udiv_qrnnd(q, r, n1, n0, d) \
 831   ({union {UDItype __ll;                                                \
 832            struct {USItype __l, __h;} __i;                              \
 833           } __xx;                                                       \
 834   __xx.__i.__h = (n1); __xx.__i.__l = (n0);                             \
 835   __asm__ ("deid %2,%0"                                                 \
 836            : "=g" (__xx.__ll)                                           \
 837            : "0" (__xx.__ll),                                           \
 838              "g" ((USItype) (d)));                                      \
 839   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 840 #define count_trailing_zeros(count,x) \
 841   do {                                                                  \
 842     __asm__ ("ffsd     %2,%0"                                           \
 843             : "=r" ((USItype) (count))                                  \
 844             : "0" ((USItype) 0),                                        \
 845               "r" ((USItype) (x)));                                     \
 846   } while (0)
 847 #endif /* __ns32000__ */
 848
 849 /* FIXME: We should test _IBMR2 here when we add assembly support for the
 850    system vendor compilers.
 851    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
 852    enough, since that hits ARM and m68k too.  */
 853 #if (defined (_ARCH_PPC)        /* AIX */                               \
 854      || defined (_ARCH_PWR)     /* AIX */                               \
 855      || defined (_ARCH_COM)     /* AIX */                               \
 856      || defined (__powerpc__)   /* gcc */                               \
 857      || defined (__POWERPC__)   /* BEOS */                              \
 858      || defined (__ppc__)       /* Darwin */                            \
 859      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
 860      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
 861          && CPU_FAMILY == PPC)                                                \
 862      ) && W_TYPE_SIZE == 32
 863 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 864   do {                                                                  \
 865     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 866       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 867              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 868     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 869       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 870              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 871     else                                                                \
 872       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 873              : "=r" (sh), "=&r" (sl)                                    \
 874              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 875   } while (0)
 876 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 877   do {                                                                  \
 878     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 879       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 880                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 881     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)         \
 882       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 883                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 884     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 885       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 886                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 887     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)         \
 888       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 889                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 890     else                                                                \
 891       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 892                : "=r" (sh), "=&r" (sl)                                  \
 893                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 894   } while (0)
 895 #define count_leading_zeros(count, x) \
 896   __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
 897 #define COUNT_LEADING_ZEROS_0 32
 898 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
 899   || defined (__ppc__)                                                    \
 900   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
 901   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
 902          && CPU_FAMILY == PPC)
 903 #define umul_ppmm(ph, pl, m0, m1) \
 904   do {                                                                  \
 905     USItype __m0 = (m0), __m1 = (m1);                                   \
 906     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 907     (pl) = __m0 * __m1;                                                 \
 908   } while (0)
 909 #define UMUL_TIME 15
 910 #define smul_ppmm(ph, pl, m0, m1) \
 911   do {                                                                  \
 912     SItype __m0 = (m0), __m1 = (m1);                                    \
 913     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 914     (pl) = __m0 * __m1;                                                 \
 915   } while (0)
 916 #define SMUL_TIME 14
 917 #define UDIV_TIME 120
 918 #elif defined (_ARCH_PWR)
 919 #define UMUL_TIME 8
 920 #define smul_ppmm(xh, xl, m0, m1) \
 921   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 922 #define SMUL_TIME 4
 923 #define sdiv_qrnnd(q, r, nh, nl, d) \
 924   __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 925 #define UDIV_TIME 100
 926 #endif
 927 #endif /* 32-bit POWER architecture variants.  */
 928
 929 /* We should test _IBMR2 here when we add assembly support for the system
 930    vendor compilers.  */
 931 #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
 932 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 933   do {                                                                  \
 934     if (__builtin_constant_p (bh) && (bh) == 0)                         \
 935       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
 936              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 937     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 938       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
 939              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
 940     else                                                                \
 941       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
 942              : "=r" (sh), "=&r" (sl)                                    \
 943              : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));              \
 944   } while (0)
 945 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 946   do {                                                                  \
 947     if (__builtin_constant_p (ah) && (ah) == 0)                         \
 948       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
 949                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 950     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)         \
 951       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
 952                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
 953     else if (__builtin_constant_p (bh) && (bh) == 0)                    \
 954       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
 955                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 956     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)         \
 957       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
 958                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
 959     else                                                                \
 960       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
 961                : "=r" (sh), "=&r" (sl)                                  \
 962                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
 963   } while (0)
 964 #define count_leading_zeros(count, x) \
 965   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 966 #define COUNT_LEADING_ZEROS_0 64
 967 #define umul_ppmm(ph, pl, m0, m1) \
 968   do {                                                                  \
 969     UDItype __m0 = (m0), __m1 = (m1);                                   \
 970     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));      \
 971     (pl) = __m0 * __m1;                                                 \
 972   } while (0)
 973 #define UMUL_TIME 15
 974 #define smul_ppmm(ph, pl, m0, m1) \
 975   do {                                                                  \
 976     DItype __m0 = (m0), __m1 = (m1);                                    \
 977     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));       \
 978     (pl) = __m0 * __m1;                                                 \
 979   } while (0)
 980 #define SMUL_TIME 14  /* ??? */
 981 #define UDIV_TIME 120 /* ??? */
 982 #endif /* 64-bit PowerPC.  */
 983
 984 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
 985 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 986   __asm__ ("a %1,%5\n\tae %0,%3"                                        \
 987            : "=r" ((USItype) (sh)),                                     \
 988              "=&r" ((USItype) (sl))                                     \
 989            : "%0" ((USItype) (ah)),                                     \
 990              "r" ((USItype) (bh)),                                      \
 991              "%1" ((USItype) (al)),                                     \
 992              "r" ((USItype) (bl)))
 993 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 994   __asm__ ("s %1,%5\n\tse %0,%3"                                        \
 995            : "=r" ((USItype) (sh)),                                     \
 996              "=&r" ((USItype) (sl))                                     \
 997            : "0" ((USItype) (ah)),                                      \
 998              "r" ((USItype) (bh)),                                      \
 999              "1" ((USItype) (al)),                                      \
1000              "r" ((USItype) (bl)))
1001 #define umul_ppmm(ph, pl, m0, m1) \
1002   do {                                                                  \
1003     USItype __m0 = (m0), __m1 = (m1);                                   \
1004     __asm__ (                                                           \
1005        "s       r2,r2\n"                                                \
1006 "       mts     r10,%2\n"                                               \
1007 "       m       r2,%3\n"                                                \
1008 "       m       r2,%3\n"                                                \
1009 "       m       r2,%3\n"                                                \
1010 "       m       r2,%3\n"                                                \
1011 "       m       r2,%3\n"                                                \
1012 "       m       r2,%3\n"                                                \
1013 "       m       r2,%3\n"                                                \
1014 "       m       r2,%3\n"                                                \
1015 "       m       r2,%3\n"                                                \
1016 "       m       r2,%3\n"                                                \
1017 "       m       r2,%3\n"                                                \
1018 "       m       r2,%3\n"                                                \
1019 "       m       r2,%3\n"                                                \
1020 "       m       r2,%3\n"                                                \
1021 "       m       r2,%3\n"                                                \
1022 "       m       r2,%3\n"                                                \
1023 "       cas     %0,r2,r0\n"                                             \
1024 "       mfs     r10,%1"                                                 \
1025              : "=r" ((USItype) (ph)),                                   \
1026                "=r" ((USItype) (pl))                                    \
1027              : "%r" (__m0),                                             \
1028                 "r" (__m1)                                              \
1029              : "r2");                                                   \
1030     (ph) += ((((SItype) __m0 >> 31) & __m1)                             \
1031              + (((SItype) __m1 >> 31) & __m0));                         \
1032   } while (0)
1033 #define UMUL_TIME 20
1034 #define UDIV_TIME 200
1035 #define count_leading_zeros(count, x) \
1036   do {                                                                  \
1037     if ((x) >= 0x10000)                                                 \
1038       __asm__ ("clz     %0,%1"                                          \
1039                : "=r" ((USItype) (count))                               \
1040                : "r" ((USItype) (x) >> 16));                            \
1041     else                                                                \
1042       {                                                                 \
1043         __asm__ ("clz   %0,%1"                                          \
1044                  : "=r" ((USItype) (count))                             \
1045                  : "r" ((USItype) (x)));                                        \
1046         (count) += 16;                                                  \
1047       }                                                                 \
1048   } while (0)
1049 #endif
1050
1051 #if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1052 #ifndef __sh1__
1053 #define umul_ppmm(w1, w0, u, v) \
1054   __asm__ (                                                             \
1055        "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0       mach,%0"        \
1056            : "=r<" ((USItype)(w1)),                                     \
1057              "=r<" ((USItype)(w0))                                      \
1058            : "r" ((USItype)(u)),                                        \
1059              "r" ((USItype)(v))                                         \
1060            : "macl", "mach")
1061 #define UMUL_TIME 5
1062 #endif
1063
1064 /* This is the same algorithm as __udiv_qrnnd_c.  */
1065 #define UDIV_NEEDS_NORMALIZATION 1
1066
1067 #define udiv_qrnnd(q, r, n1, n0, d) \
1068   do {                                                                  \
1069     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)                      \
1070                         __attribute__ ((visibility ("hidden")));        \
1071     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */  \
1072     __asm__ (                                                           \
1073         "mov%M4 %4,r5\n"                                                \
1074 "       swap.w %3,r4\n"                                                 \
1075 "       swap.w r5,r6\n"                                                 \
1076 "       jsr @%5\n"                                                      \
1077 "       shll16 r6\n"                                                    \
1078 "       swap.w r4,r4\n"                                                 \
1079 "       jsr @%5\n"                                                      \
1080 "       swap.w r1,%0\n"                                                 \
1081 "       or r1,%0"                                                       \
1082         : "=r" (q), "=&z" (r)                                           \
1083         : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)          \
1084         : "r1", "r2", "r4", "r5", "r6", "pr", "t");                     \
1085   } while (0)
1086
1087 #define UDIV_TIME 80
1088
1089 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1090   __asm__ ("clrt;subc %5,%1; subc %4,%0"                                \
1091            : "=r" (sh), "=r" (sl)                                       \
1092            : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1093
1094 #endif /* __sh__ */
1095
1096 #if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1097 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1098 #define count_leading_zeros(count, x) \
1099   do                                                                    \
1100     {                                                                   \
1101       UDItype x_ = (USItype)(x);                                        \
1102       SItype c_;                                                        \
1103                                                                         \
1104       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));                    \
1105       (count) = c_ - 31;                                                \
1106     }                                                                   \
1107   while (0)
1108 #define COUNT_LEADING_ZEROS_0 32
1109 #endif
1110
1111 #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1112     && W_TYPE_SIZE == 32
1113 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1114   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"                          \
1115            : "=r" ((USItype) (sh)),                                     \
1116              "=&r" ((USItype) (sl))                                     \
1117            : "%rJ" ((USItype) (ah)),                                    \
1118              "rI" ((USItype) (bh)),                                     \
1119              "%rJ" ((USItype) (al)),                                    \
1120              "rI" ((USItype) (bl))                                      \
1121            __CLOBBER_CC)
1122 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1123   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"                          \
1124            : "=r" ((USItype) (sh)),                                     \
1125              "=&r" ((USItype) (sl))                                     \
1126            : "rJ" ((USItype) (ah)),                                     \
1127              "rI" ((USItype) (bh)),                                     \
1128              "rJ" ((USItype) (al)),                                     \
1129              "rI" ((USItype) (bl))                                      \
1130            __CLOBBER_CC)
1131 #if defined (__sparc_v8__)
1132 #define umul_ppmm(w1, w0, u, v) \
1133   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1134            : "=r" ((USItype) (w1)),                                     \
1135              "=r" ((USItype) (w0))                                      \
1136            : "r" ((USItype) (u)),                                       \
1137              "r" ((USItype) (v)))
1138 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1139   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1140            : "=&r" ((USItype) (__q)),                                   \
1141              "=&r" ((USItype) (__r))                                    \
1142            : "r" ((USItype) (__n1)),                                    \
1143              "r" ((USItype) (__n0)),                                    \
1144              "r" ((USItype) (__d)))
1145 #else
1146 #if defined (__sparclite__)
1147 /* This has hardware multiply but not divide.  It also has two additional
1148    instructions scan (ffs from high bit) and divscc.  */
1149 #define umul_ppmm(w1, w0, u, v) \
1150   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
1151            : "=r" ((USItype) (w1)),                                     \
1152              "=r" ((USItype) (w0))                                      \
1153            : "r" ((USItype) (u)),                                       \
1154              "r" ((USItype) (v)))
1155 #define udiv_qrnnd(q, r, n1, n0, d) \
1156   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1157 "       wr      %%g0,%2,%%y     ! Not a delayed write for sparclite\n"  \
1158 "       tst     %%g0\n"                                                 \
1159 "       divscc  %3,%4,%%g1\n"                                           \
1160 "       divscc  %%g1,%4,%%g1\n"                                         \
1161 "       divscc  %%g1,%4,%%g1\n"                                         \
1162 "       divscc  %%g1,%4,%%g1\n"                                         \
1163 "       divscc  %%g1,%4,%%g1\n"                                         \
1164 "       divscc  %%g1,%4,%%g1\n"                                         \
1165 "       divscc  %%g1,%4,%%g1\n"                                         \
1166 "       divscc  %%g1,%4,%%g1\n"                                         \
1167 "       divscc  %%g1,%4,%%g1\n"                                         \
1168 "       divscc  %%g1,%4,%%g1\n"                                         \
1169 "       divscc  %%g1,%4,%%g1\n"                                         \
1170 "       divscc  %%g1,%4,%%g1\n"                                         \
1171 "       divscc  %%g1,%4,%%g1\n"                                         \
1172 "       divscc  %%g1,%4,%%g1\n"                                         \
1173 "       divscc  %%g1,%4,%%g1\n"                                         \
1174 "       divscc  %%g1,%4,%%g1\n"                                         \
1175 "       divscc  %%g1,%4,%%g1\n"                                         \
1176 "       divscc  %%g1,%4,%%g1\n"                                         \
1177 "       divscc  %%g1,%4,%%g1\n"                                         \
1178 "       divscc  %%g1,%4,%%g1\n"                                         \
1179 "       divscc  %%g1,%4,%%g1\n"                                         \
1180 "       divscc  %%g1,%4,%%g1\n"                                         \
1181 "       divscc  %%g1,%4,%%g1\n"                                         \
1182 "       divscc  %%g1,%4,%%g1\n"                                         \
1183 "       divscc  %%g1,%4,%%g1\n"                                         \
1184 "       divscc  %%g1,%4,%%g1\n"                                         \
1185 "       divscc  %%g1,%4,%%g1\n"                                         \
1186 "       divscc  %%g1,%4,%%g1\n"                                         \
1187 "       divscc  %%g1,%4,%%g1\n"                                         \
1188 "       divscc  %%g1,%4,%%g1\n"                                         \
1189 "       divscc  %%g1,%4,%%g1\n"                                         \
1190 "       divscc  %%g1,%4,%0\n"                                           \
1191 "       rd      %%y,%1\n"                                               \
1192 "       bl,a 1f\n"                                                      \
1193 "       add     %1,%4,%1\n"                                             \
1194 "1:     ! End of inline udiv_qrnnd"                                     \
1195            : "=r" ((USItype) (q)),                                      \
1196              "=r" ((USItype) (r))                                       \
1197            : "r" ((USItype) (n1)),                                      \
1198              "r" ((USItype) (n0)),                                      \
1199              "rI" ((USItype) (d))                                       \
1200            : "g1" __AND_CLOBBER_CC)
1201 #define UDIV_TIME 37
1202 #define count_leading_zeros(count, x) \
1203   do {                                                                  \
1204   __asm__ ("scan %1,1,%0"                                               \
1205            : "=r" ((USItype) (count))                                   \
1206            : "r" ((USItype) (x)));                                      \
1207   } while (0)
1208 /* Early sparclites return 63 for an argument of 0, but they warn that future
1209    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1210    undefined.  */
1211 #else
1212 /* SPARC without integer multiplication and divide instructions.
1213    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1214 #define umul_ppmm(w1, w0, u, v) \
1215   __asm__ ("! Inlined umul_ppmm\n"                                      \
1216 "       wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n"\
1217 "       sra     %3,31,%%o5      ! Don't move this insn\n"               \
1218 "       and     %2,%%o5,%%o5    ! Don't move this insn\n"               \
1219 "       andcc   %%g0,0,%%g1     ! Don't move this insn\n"               \
1220 "       mulscc  %%g1,%3,%%g1\n"                                         \
1221 "       mulscc  %%g1,%3,%%g1\n"                                         \
1222 "       mulscc  %%g1,%3,%%g1\n"                                         \
1223 "       mulscc  %%g1,%3,%%g1\n"                                         \
1224 "       mulscc  %%g1,%3,%%g1\n"                                         \
1225 "       mulscc  %%g1,%3,%%g1\n"                                         \
1226 "       mulscc  %%g1,%3,%%g1\n"                                         \
1227 "       mulscc  %%g1,%3,%%g1\n"                                         \
1228 "       mulscc  %%g1,%3,%%g1\n"                                         \
1229 "       mulscc  %%g1,%3,%%g1\n"                                         \
1230 "       mulscc  %%g1,%3,%%g1\n"                                         \
1231 "       mulscc  %%g1,%3,%%g1\n"                                         \
1232 "       mulscc  %%g1,%3,%%g1\n"                                         \
1233 "       mulscc  %%g1,%3,%%g1\n"                                         \
1234 "       mulscc  %%g1,%3,%%g1\n"                                         \
1235 "       mulscc  %%g1,%3,%%g1\n"                                         \
1236 "       mulscc  %%g1,%3,%%g1\n"                                         \
1237 "       mulscc  %%g1,%3,%%g1\n"                                         \
1238 "       mulscc  %%g1,%3,%%g1\n"                                         \
1239 "       mulscc  %%g1,%3,%%g1\n"                                         \
1240 "       mulscc  %%g1,%3,%%g1\n"                                         \
1241 "       mulscc  %%g1,%3,%%g1\n"                                         \
1242 "       mulscc  %%g1,%3,%%g1\n"                                         \
1243 "       mulscc  %%g1,%3,%%g1\n"                                         \
1244 "       mulscc  %%g1,%3,%%g1\n"                                         \
1245 "       mulscc  %%g1,%3,%%g1\n"                                         \
1246 "       mulscc  %%g1,%3,%%g1\n"                                         \
1247 "       mulscc  %%g1,%3,%%g1\n"                                         \
1248 "       mulscc  %%g1,%3,%%g1\n"                                         \
1249 "       mulscc  %%g1,%3,%%g1\n"                                         \
1250 "       mulscc  %%g1,%3,%%g1\n"                                         \
1251 "       mulscc  %%g1,%3,%%g1\n"                                         \
1252 "       mulscc  %%g1,0,%%g1\n"                                          \
1253 "       add     %%g1,%%o5,%0\n"                                         \
1254 "       rd      %%y,%1"                                                 \
1255            : "=r" ((USItype) (w1)),                                     \
1256              "=r" ((USItype) (w0))                                      \
1257            : "%rI" ((USItype) (u)),                                     \
1258              "r" ((USItype) (v))                                                \
1259            : "g1", "o5" __AND_CLOBBER_CC)
1260 #define UMUL_TIME 39            /* 39 instructions */
1261 /* It's quite necessary to add this much assembler for the sparc.
1262    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1263 #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1264   __asm__ ("! Inlined udiv_qrnnd\n"                                     \
1265 "       mov     32,%%g1\n"                                              \
1266 "       subcc   %1,%2,%%g0\n"                                           \
1267 "1:     bcs     5f\n"                                                   \
1268 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1269 "       sub     %1,%2,%1        ! this kills msb of n\n"                \
1270 "       addx    %1,%1,%1        ! so this can't give carry\n"           \
1271 "       subcc   %%g1,1,%%g1\n"                                          \
1272 "2:     bne     1b\n"                                                   \
1273 "        subcc  %1,%2,%%g0\n"                                           \
1274 "       bcs     3f\n"                                                   \
1275 "        addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb\n"      \
1276 "       b       3f\n"                                                   \
1277 "        sub    %1,%2,%1        ! this kills msb of n\n"                \
1278 "4:     sub     %1,%2,%1\n"                                             \
1279 "5:     addxcc  %1,%1,%1\n"                                             \
1280 "       bcc     2b\n"                                                   \
1281 "        subcc  %%g1,1,%%g1\n"                                          \
1282 "! Got carry from n.  Subtract next step to cancel this carry.\n"       \
1283 "       bne     4b\n"                                                   \
1284 "        addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n"      \
1285 "       sub     %1,%2,%1\n"                                             \
1286 "3:     xnor    %0,0,%0\n"                                              \
1287 "       ! End of inline udiv_qrnnd"                                     \
1288            : "=&r" ((USItype) (__q)),                                   \
1289              "=&r" ((USItype) (__r))                                    \
1290            : "r" ((USItype) (__d)),                                     \
1291              "1" ((USItype) (__n1)),                                    \
1292              "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1293 #define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
1294 #endif /* __sparclite__ */
1295 #endif /* __sparc_v8__ */
1296 #endif /* sparc32 */
1297
1298 #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1299     && W_TYPE_SIZE == 64
1300 #define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
1301   __asm__ ("addcc %r4,%5,%1\n\t"                                        \
1302            "add %r2,%3,%0\n\t"                                          \
1303            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1304            "add %0, 1, %0\n"                                            \
1305            "1:"                                                         \
1306            : "=r" ((UDItype)(sh)),                                      \
1307              "=&r" ((UDItype)(sl))                                      \
1308            : "%rJ" ((UDItype)(ah)),                                     \
1309              "rI" ((UDItype)(bh)),                                      \
1310              "%rJ" ((UDItype)(al)),                                     \
1311              "rI" ((UDItype)(bl))                                       \
1312            __CLOBBER_CC)
1313
1314 #define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
1315   __asm__ ("subcc %r4,%5,%1\n\t"                                        \
1316            "sub %r2,%3,%0\n\t"                                          \
1317            "bcs,a,pn %%xcc, 1f\n\t"                                     \
1318            "sub %0, 1, %0\n\t"                                          \
1319            "1:"                                                         \
1320            : "=r" ((UDItype)(sh)),                                      \
1321              "=&r" ((UDItype)(sl))                                      \
1322            : "rJ" ((UDItype)(ah)),                                      \
1323              "rI" ((UDItype)(bh)),                                      \
1324              "rJ" ((UDItype)(al)),                                      \
1325              "rI" ((UDItype)(bl))                                       \
1326            __CLOBBER_CC)
1327
1328 #define umul_ppmm(wh, wl, u, v)                                         \
1329   do {                                                                  \
1330           UDItype tmp1, tmp2, tmp3, tmp4;                               \
1331           __asm__ __volatile__ (                                        \
1332                    "srl %7,0,%3\n\t"                                    \
1333                    "mulx %3,%6,%1\n\t"                                  \
1334                    "srlx %6,32,%2\n\t"                                  \
1335                    "mulx %2,%3,%4\n\t"                                  \
1336                    "sllx %4,32,%5\n\t"                                  \
1337                    "srl %6,0,%3\n\t"                                    \
1338                    "sub %1,%5,%5\n\t"                                   \
1339                    "srlx %5,32,%5\n\t"                                  \
1340                    "addcc %4,%5,%4\n\t"                                 \
1341                    "srlx %7,32,%5\n\t"                                  \
1342                    "mulx %3,%5,%3\n\t"                                  \
1343                    "mulx %2,%5,%5\n\t"                                  \
1344                    "sethi %%hi(0x80000000),%2\n\t"                      \
1345                    "addcc %4,%3,%4\n\t"                                 \
1346                    "srlx %4,32,%4\n\t"                                  \
1347                    "add %2,%2,%2\n\t"                                   \
1348                    "movcc %%xcc,%%g0,%2\n\t"                            \
1349                    "addcc %5,%4,%5\n\t"                                 \
1350                    "sllx %3,32,%3\n\t"                                  \
1351                    "add %1,%3,%1\n\t"                                   \
1352                    "add %5,%2,%0"                                       \
1353            : "=r" ((UDItype)(wh)),                                      \
1354              "=&r" ((UDItype)(wl)),                                     \
1355              "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)     \
1356            : "r" ((UDItype)(u)),                                        \
1357              "r" ((UDItype)(v))                                         \
1358            __CLOBBER_CC);                                               \
1359   } while (0)
1360 #define UMUL_TIME 96
1361 #define UDIV_TIME 230
1362 #endif /* sparc64 */
1363
1364 #if defined (__vax__) && W_TYPE_SIZE == 32
1365 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1366   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                  \
1367            : "=g" ((USItype) (sh)),                                     \
1368              "=&g" ((USItype) (sl))                                     \
1369            : "%0" ((USItype) (ah)),                                     \
1370              "g" ((USItype) (bh)),                                      \
1371              "%1" ((USItype) (al)),                                     \
1372              "g" ((USItype) (bl)))
1373 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1374   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"                                  \
1375            : "=g" ((USItype) (sh)),                                     \
1376              "=&g" ((USItype) (sl))                                     \
1377            : "0" ((USItype) (ah)),                                      \
1378              "g" ((USItype) (bh)),                                      \
1379              "1" ((USItype) (al)),                                      \
1380              "g" ((USItype) (bl)))
1381 #define umul_ppmm(xh, xl, m0, m1) \
1382   do {                                                                  \
1383     union {                                                             \
1384         UDItype __ll;                                                   \
1385         struct {USItype __l, __h;} __i;                                 \
1386       } __xx;                                                           \
1387     USItype __m0 = (m0), __m1 = (m1);                                   \
1388     __asm__ ("emul %1,%2,$0,%0"                                         \
1389              : "=r" (__xx.__ll)                                         \
1390              : "g" (__m0),                                              \
1391                "g" (__m1));                                             \
1392     (xh) = __xx.__i.__h;                                                \
1393     (xl) = __xx.__i.__l;                                                \
1394     (xh) += ((((SItype) __m0 >> 31) & __m1)                             \
1395              + (((SItype) __m1 >> 31) & __m0));                         \
1396   } while (0)
1397 #define sdiv_qrnnd(q, r, n1, n0, d) \
1398   do {                                                                  \
1399     union {DItype __ll;                                                 \
1400            struct {SItype __l, __h;} __i;                               \
1401           } __xx;                                                       \
1402     __xx.__i.__h = n1; __xx.__i.__l = n0;                               \
1403     __asm__ ("ediv %3,%2,%0,%1"                                         \
1404              : "=g" (q), "=g" (r)                                       \
1405              : "g" (__xx.__ll), "g" (d));                               \
1406   } while (0)
1407 #endif /* __vax__ */
1408
1409 #ifdef _TMS320C6X
1410 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1411   do                                                                    \
1412     {                                                                   \
1413       UDItype __ll;                                                     \
1414       __asm__ ("addu .l1 %1, %2, %0"                                    \
1415                : "=a" (__ll) : "a" (al), "a" (bl));                     \
1416       (sl) = (USItype)__ll;                                             \
1417       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);                     \
1418     }                                                                   \
1419   while (0)
1420
1421 #ifdef _TMS320C6400_PLUS
1422 #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1423 #define umul_ppmm(w1, w0, u, v)                                         \
1424   do {                                                                  \
1425     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);              \
1426     (w1) = (USItype) (__x >> 32);                                       \
1427     (w0) = (USItype) (__x);                                             \
1428   } while (0)
1429 #endif  /* _TMS320C6400_PLUS */
1430
1431 #define count_leading_zeros(count, x)   ((count) = __builtin_clz (x))
1432 #ifdef _TMS320C6400
1433 #define count_trailing_zeros(count, x)  ((count) = __builtin_ctz (x))
1434 #endif
1435 #define UMUL_TIME 4
1436 #define UDIV_TIME 40
1437 #endif /* _TMS320C6X */
1438
1439 #if defined (__xtensa__) && W_TYPE_SIZE == 32
1440 /* This code is not Xtensa-configuration-specific, so rely on the compiler
1441    to expand builtin functions depending on what configuration features
1442    are available.  This avoids library calls when the operation can be
1443    performed in-line.  */
1444 #define umul_ppmm(w1, w0, u, v)                                         \
1445   do {                                                                  \
1446     DWunion __w;                                                        \
1447     __w.ll = __builtin_umulsidi3 (u, v);                                \
1448     w1 = __w.s.high;                                                    \
1449     w0 = __w.s.low;                                                     \
1450   } while (0)
1451 #define __umulsidi3(u, v)               __builtin_umulsidi3 (u, v)
1452 #define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz (X))
1453 #define count_trailing_zeros(COUNT, X)  ((COUNT) = __builtin_ctz (X))
1454 #endif /* __xtensa__ */
1455
1456 #if defined xstormy16
1457 extern UHItype __stormy16_count_leading_zeros (UHItype);
1458 #define count_leading_zeros(count, x)                                   \
1459   do                                                                    \
1460     {                                                                   \
1461       UHItype size;                                                     \
1462                                                                         \
1463       /* We assume that W_TYPE_SIZE is a multiple of 16...  */          \
1464       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)           \
1465         {                                                               \
1466           UHItype c;                                                    \
1467                                                                         \
1468           c = __clzhi2 ((x) >> (size - 16));                            \
1469           (count) += c;                                                 \
1470           if (c != 16)                                                  \
1471             break;                                                      \
1472         }                                                               \
1473     }                                                                   \
1474   while (0)
1475 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1476 #endif
1477
1478 #if defined (__z8000__) && W_TYPE_SIZE == 16
1479 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1480   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
1481            : "=r" ((unsigned int)(sh)),                                 \
1482              "=&r" ((unsigned int)(sl))                                 \
1483            : "%0" ((unsigned int)(ah)),                                 \
1484              "r" ((unsigned int)(bh)),                                  \
1485              "%1" ((unsigned int)(al)),                                 \
1486              "rQR" ((unsigned int)(bl)))
1487 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1488   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
1489            : "=r" ((unsigned int)(sh)),                                 \
1490              "=&r" ((unsigned int)(sl))                                 \
1491            : "0" ((unsigned int)(ah)),                                  \
1492              "r" ((unsigned int)(bh)),                                  \
1493              "1" ((unsigned int)(al)),                                  \
1494              "rQR" ((unsigned int)(bl)))
1495 #define umul_ppmm(xh, xl, m0, m1) \
1496   do {                                                                  \
1497     union {long int __ll;                                               \
1498            struct {unsigned int __h, __l;} __i;                         \
1499           } __xx;                                                       \
1500     unsigned int __m0 = (m0), __m1 = (m1);                              \
1501     __asm__ ("mult      %S0,%H3"                                        \
1502              : "=r" (__xx.__i.__h),                                     \
1503                "=r" (__xx.__i.__l)                                      \
1504              : "%1" (__m0),                                             \
1505                "rQR" (__m1));                                           \
1506     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                           \
1507     (xh) += ((((signed int) __m0 >> 15) & __m1)                         \
1508              + (((signed int) __m1 >> 15) & __m0));                     \
1509   } while (0)
1510 #endif /* __z8000__ */
1511
1512 #endif /* __GNUC__ */
1513
1514 /* If this machine has no inline assembler, use C macros.  */
1515
1516 #if !defined (add_ssaaaa)
1517 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1518   do {                                                                  \
1519     UWtype __x;                                                         \
1520     __x = (al) + (bl);                                                  \
1521     (sh) = (ah) + (bh) + (__x < (al));                                  \
1522     (sl) = __x;                                                         \
1523   } while (0)
1524 #endif
1525
1526 #if !defined (sub_ddmmss)
1527 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1528   do {                                                                  \
1529     UWtype __x;                                                         \
1530     __x = (al) - (bl);                                                  \
1531     (sh) = (ah) - (bh) - (__x > (al));                                  \
1532     (sl) = __x;                                                         \
1533   } while (0)
1534 #endif
1535
1536 /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1537    smul_ppmm.  */
1538 #if !defined (umul_ppmm) && defined (smul_ppmm)
1539 #define umul_ppmm(w1, w0, u, v)                                         \
1540   do {                                                                  \
1541     UWtype __w1;                                                        \
1542     UWtype __xm0 = (u), __xm1 = (v);                                    \
1543     smul_ppmm (__w1, w0, __xm0, __xm1);                                 \
1544     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)               \
1545                 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);              \
1546   } while (0)
1547 #endif
1548
1549 /* If we still don't have umul_ppmm, define it using plain C.  */
1550 #if !defined (umul_ppmm)
1551 #define umul_ppmm(w1, w0, u, v)                                         \
1552   do {                                                                  \
1553     UWtype __x0, __x1, __x2, __x3;                                      \
1554     UHWtype __ul, __vl, __uh, __vh;                                     \
1555                                                                         \
1556     __ul = __ll_lowpart (u);                                            \
1557     __uh = __ll_highpart (u);                                           \
1558     __vl = __ll_lowpart (v);                                            \
1559     __vh = __ll_highpart (v);                                           \
1560                                                                         \
1561     __x0 = (UWtype) __ul * __vl;                                        \
1562     __x1 = (UWtype) __ul * __vh;                                        \
1563     __x2 = (UWtype) __uh * __vl;                                        \
1564     __x3 = (UWtype) __uh * __vh;                                        \
1565                                                                         \
1566     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
1567     __x1 += __x2;               /* but this indeed can */               \
1568     if (__x1 < __x2)            /* did we get it? */                    \
1569       __x3 += __ll_B;           /* yes, add it in the proper pos.  */   \
1570                                                                         \
1571     (w1) = __x3 + __ll_highpart (__x1);                                 \
1572     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);          \
1573   } while (0)
1574 #endif
1575
1576 #if !defined (__umulsidi3)
1577 #define __umulsidi3(u, v) \
1578   ({DWunion __w;                                                        \
1579     umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
1580     __w.ll; })
1581 #endif
1582
1583 /* Define this unconditionally, so it can be used for debugging.  */
1584 #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1585   do {                                                                  \
1586     UWtype __d1, __d0, __q1, __q0;                                      \
1587     UWtype __r1, __r0, __m;                                             \
1588     __d1 = __ll_highpart (d);                                           \
1589     __d0 = __ll_lowpart (d);                                            \
1590                                                                         \
1591     __r1 = (n1) % __d1;                                                 \
1592     __q1 = (n1) / __d1;                                                 \
1593     __m = (UWtype) __q1 * __d0;                                         \
1594     __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
1595     if (__r1 < __m)                                                     \
1596       {                                                                 \
1597         __q1--, __r1 += (d);                                            \
1598         if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1599           if (__r1 < __m)                                               \
1600             __q1--, __r1 += (d);                                        \
1601       }                                                                 \
1602     __r1 -= __m;                                                        \
1603                                                                         \
1604     __r0 = __r1 % __d1;                                                 \
1605     __q0 = __r1 / __d1;                                                 \
1606     __m = (UWtype) __q0 * __d0;                                         \
1607     __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
1608     if (__r0 < __m)                                                     \
1609       {                                                                 \
1610         __q0--, __r0 += (d);                                            \
1611         if (__r0 >= (d))                                                \
1612           if (__r0 < __m)                                               \
1613             __q0--, __r0 += (d);                                        \
1614       }                                                                 \
1615     __r0 -= __m;                                                        \
1616                                                                         \
1617     (q) = (UWtype) __q1 * __ll_B | __q0;                                \
1618     (r) = __r0;                                                         \
1619   } while (0)
1620
1621 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1622    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1623 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1624 #define udiv_qrnnd(q, r, nh, nl, d) \
1625   do {                                                                  \
1626     USItype __r;                                                        \
1627     (q) = __udiv_w_sdiv (&__r, nh, nl, d);                              \
1628     (r) = __r;                                                          \
1629   } while (0)
1630 #endif
1631
1632 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1633 #if !defined (udiv_qrnnd)
1634 #define UDIV_NEEDS_NORMALIZATION 1
1635 #define udiv_qrnnd __udiv_qrnnd_c
1636 #endif
1637
1638 #if !defined (count_leading_zeros)
1639 #define count_leading_zeros(count, x) \
1640   do {                                                                  \
1641     UWtype __xr = (x);                                                  \
1642     UWtype __a;                                                         \
1643                                                                         \
1644     if (W_TYPE_SIZE <= 32)                                              \
1645       {                                                                 \
1646         __a = __xr < ((UWtype)1<<2*__BITS4)                             \
1647           ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)                 \
1648           : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);   \
1649       }                                                                 \
1650     else                                                                \
1651       {                                                                 \
1652         for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)                  \
1653           if (((__xr >> __a) & 0xff) != 0)                              \
1654             break;                                                      \
1655       }                                                                 \
1656                                                                         \
1657     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);             \
1658   } while (0)
1659 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1660 #endif
1661
1662 #if !defined (count_trailing_zeros)
1663 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1664    defined in asm, but if it is not, the C version above is good enough.  */
1665 #define count_trailing_zeros(count, x) \
1666   do {                                                                  \
1667     UWtype __ctz_x = (x);                                               \
1668     UWtype __ctz_c;                                                     \
1669     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                  \
1670     (count) = W_TYPE_SIZE - 1 - __ctz_c;                                \
1671   } while (0)
1672 #endif
1673
1674 #ifndef UDIV_NEEDS_NORMALIZATION
1675 #define UDIV_NEEDS_NORMALIZATION 0
1676 #endif