contrib/gmp/gmp-impl.h

   1 /* Include file for internal GNU MP types and definitions.
   2
   3    THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
   4    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
   5
   6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
   7 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 3 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  23
  24
  25 /* __GMP_DECLSPEC must be given on any global data that will be accessed
  26    from outside libgmp, meaning from the test or development programs, or
  27    from libgmpxx.  Failing to do this will result in an incorrect address
  28    being used for the accesses.  On functions __GMP_DECLSPEC makes calls
  29    from outside libgmp more efficient, but they'll still work fine without
  30    it.  */
  31
  32
  33 #ifndef __GMP_IMPL_H__
  34 #define __GMP_IMPL_H__
  35
  36 #if defined _CRAY
  37 #include <intrinsics.h>  /* for _popcnt */
  38 #endif
  39
  40 /* limits.h is not used in general, since it's an ANSI-ism, and since on
  41    solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
  42    values (the ABI=64 values).
  43
  44    On Cray vector systems, however, we need the system limits.h since sizes
  45    of signed and unsigned types can differ there, depending on compiler
  46    options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
  47    reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
  48    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
  49
  50 #if defined _CRAY
  51 #include <limits.h>
  52 #endif
  53
  54 /* For fat.h and other fat binary stuff.
  55    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
  56    declared this way are only used to set function pointers in __gmp_cpuvec,
  57    they're not called directly.  */
  58 #define DECL_add_n(name) \
  59   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
  60 #define DECL_addmul_1(name) \
  61   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  62 #define DECL_copyd(name) \
  63   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  64 #define DECL_copyi(name) \
  65   DECL_copyd (name)
  66 #define DECL_divexact_1(name) \
  67   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  68 #define DECL_divexact_by3c(name) \
  69   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  70 #define DECL_divrem_1(name) \
  71   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
  72 #define DECL_gcd_1(name) \
  73   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  74 #define DECL_lshift(name) \
  75   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
  76 #define DECL_mod_1(name) \
  77   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  78 #define DECL_mod_34lsub1(name) \
  79   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
  80 #define DECL_modexact_1c_odd(name) \
  81   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  82 #define DECL_mul_1(name) \
  83   DECL_addmul_1 (name)
  84 #define DECL_mul_basecase(name) \
  85   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
  86 #define DECL_preinv_divrem_1(name) \
  87   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
  88 #define DECL_preinv_mod_1(name) \
  89   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  90 #define DECL_rshift(name) \
  91   DECL_lshift (name)
  92 #define DECL_sqr_basecase(name) \
  93   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  94 #define DECL_sub_n(name) \
  95   DECL_add_n (name)
  96 #define DECL_submul_1(name) \
  97   DECL_addmul_1 (name)
  98
  99 #if ! __GMP_WITHIN_CONFIGURE
 100 #include "config.h"
 101 #include "gmp-mparam.h"
 102 #include "fib_table.h"
 103 #include "mp_bases.h"
 104 #if WANT_FAT_BINARY
 105 #include "fat.h"
 106 #endif
 107 #endif
 108
 109 #if HAVE_INTTYPES_H      /* for uint_least32_t */
 110 # include <inttypes.h>
 111 #else
 112 # if HAVE_STDINT_H
 113 #  include <stdint.h>
 114 # endif
 115 #endif
 116
 117 #ifdef __cplusplus
 118 #include <cstring>  /* for strlen */
 119 #include <string>   /* for std::string */
 120 #endif
 121
 122
 123 #ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
 124 #define WANT_TMP_DEBUG 0
 125 #endif
 126
 127 /* The following tries to get a good version of alloca.  The tests are
 128    adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
 129    Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
 130    be setup appropriately.
 131
 132    ifndef alloca - a cpp define might already exist.
 133        glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
 134        HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
 135
 136    GCC __builtin_alloca - preferred whenever available.
 137
 138    _AIX pragma - IBM compilers need a #pragma in "each module that needs to
 139        use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
 140        used in past versions of GMP, retained still in case it matters.
 141
 142        The autoconf manual says this pragma needs to be at the start of a C
 143        file, apart from comments and preprocessor directives.  Is that true?
 144        xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
 145        from gmp.h.
 146 */
 147
 148 #ifndef alloca
 149 # ifdef __GNUC__
 150 #  define alloca __builtin_alloca
 151 # else
 152 #  ifdef __DECC
 153 #   define alloca(x) __ALLOCA(x)
 154 #  else
 155 #   ifdef _MSC_VER
 156 #    include <malloc.h>
 157 #    define alloca _alloca
 158 #   else
 159 #    if HAVE_ALLOCA_H
 160 #     include <alloca.h>
 161 #    else
 162 #     if defined (_AIX) || defined (_IBMR2)
 163  #pragma alloca
 164 #     else
 165        char *alloca ();
 166 #     endif
 167 #    endif
 168 #   endif
 169 #  endif
 170 # endif
 171 #endif
 172
 173
 174 /* if not provided by gmp-mparam.h */
 175 #ifndef BYTES_PER_MP_LIMB
 176 #define BYTES_PER_MP_LIMB  SIZEOF_MP_LIMB_T
 177 #endif
 178 #ifndef BITS_PER_MP_LIMB
 179 #define BITS_PER_MP_LIMB  (8 * SIZEOF_MP_LIMB_T)
 180 #endif
 181
 182 #define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
 183
 184
 185 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
 186 #if HAVE_UINT_LEAST32_T
 187 typedef uint_least32_t      gmp_uint_least32_t;
 188 #else
 189 #if SIZEOF_UNSIGNED_SHORT >= 4
 190 typedef unsigned short      gmp_uint_least32_t;
 191 #else
 192 #if SIZEOF_UNSIGNED >= 4
 193 typedef unsigned            gmp_uint_least32_t;
 194 #else
 195 typedef unsigned long       gmp_uint_least32_t;
 196 #endif
 197 #endif
 198 #endif
 199
 200
 201 /* const and signed must match __gmp_const and __gmp_signed, so follow the
 202    decision made for those in gmp.h.    */
 203 #if ! __GMP_HAVE_CONST
 204 #define const   /* empty */
 205 #define signed  /* empty */
 206 #endif
 207
 208 /* "const" basically means a function does nothing but examine its arguments
 209    and give a return value, it doesn't read or write any memory (neither
 210    global nor pointed to by arguments), and has no other side-effects.  This
 211    is more restrictive than "pure".  See info node "(gcc)Function
 212    Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
 213    this off when trying to write timing loops.  */
 214 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
 215 #define ATTRIBUTE_CONST  __attribute__ ((const))
 216 #else
 217 #define ATTRIBUTE_CONST
 218 #endif
 219
 220 #if HAVE_ATTRIBUTE_NORETURN
 221 #define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
 222 #else
 223 #define ATTRIBUTE_NORETURN
 224 #endif
 225
 226 /* "malloc" means a function behaves like malloc in that the pointer it
 227    returns doesn't alias anything.  */
 228 #if HAVE_ATTRIBUTE_MALLOC
 229 #define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
 230 #else
 231 #define ATTRIBUTE_MALLOC
 232 #endif
 233
 234
 235 #if ! HAVE_STRCHR
 236 #define strchr(s,c)  index(s,c)
 237 #endif
 238
 239 #if ! HAVE_MEMSET
 240 #define memset(p, c, n)                 \
 241   do {                                  \
 242     ASSERT ((n) >= 0);                  \
 243     char *__memset__p = (p);            \
 244     int  __i;                           \
 245     for (__i = 0; __i < (n); __i++)     \
 246       __memset__p[__i] = (c);           \
 247   } while (0)
 248 #endif
 249
 250 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
 251    mode.  Falling back to a memcpy will give maximum portability, since it
 252    works no matter whether va_list is a pointer, struct or array.  */
 253 #if ! defined (va_copy) && defined (__va_copy)
 254 #define va_copy(dst,src)  __va_copy(dst,src)
 255 #endif
 256 #if ! defined (va_copy)
 257 #define va_copy(dst,src) \
 258   do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
 259 #endif
 260
 261
 262 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
 263    (ie. ctlz, ctpop, cttz).  */
 264 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
 265   || HAVE_HOST_CPU_alphaev7
 266 #define HAVE_HOST_CPU_alpha_CIX 1
 267 #endif
 268
 269
 270 #if defined (__cplusplus)
 271 extern "C" {
 272 #endif
 273
 274
 275 /* Usage: TMP_DECL;
 276           TMP_MARK;
 277           ptr = TMP_ALLOC (bytes);
 278           TMP_FREE;
 279
 280    Small allocations should use TMP_SALLOC, big allocations should use
 281    TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
 282
 283    Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
 284    TMP_SFREE.
 285
 286    TMP_DECL just declares a variable, but might be empty and so must be last
 287    in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
 288    TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
 289    TMP_MARK was made, but then no TMP_ALLOCs.  */
 290
 291 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
 292    __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC
 293    isn't used for "double"s, so that's not in the union.  */
 294 union tmp_align_t {
 295   mp_limb_t  l;
 296   char       *p;
 297 };
 298 #define __TMP_ALIGN  sizeof (union tmp_align_t)
 299
 300 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
 301    "a" must be an unsigned type.
 302    This is designed for use with a compile-time constant "m".
 303    The POW2 case is expected to be usual, and gcc 3.0 and up recognises
 304    "(-(8*n))%8" or the like is always zero, which means the rounding up in
 305    the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
 306 #define ROUND_UP_MULTIPLE(a,m)          \
 307   (POW2_P(m) ? (a) + (-(a))%(m)         \
 308    : (a)+(m)-1 - (((a)+(m)-1) % (m)))
 309
 310 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
 311 struct tmp_reentrant_t {
 312   struct tmp_reentrant_t  *next;
 313   size_t                  size;   /* bytes, including header */
 314 };
 315 void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
 316 void  __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
 317 #endif
 318
 319 #if WANT_TMP_ALLOCA
 320 #define TMP_SDECL
 321 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 322 #define TMP_SMARK
 323 #define TMP_MARK                __tmp_marker = 0
 324 #define TMP_SALLOC(n)           alloca(n)
 325 #define TMP_BALLOC(n)           __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 326 #define TMP_ALLOC(n)                                                    \
 327   (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
 328 #define TMP_SFREE
 329 #define TMP_FREE                                                           \
 330   do {                                                                     \
 331     if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
 332   } while (0)
 333 #endif
 334
 335 #if WANT_TMP_REENTRANT
 336 #define TMP_SDECL               TMP_DECL
 337 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 338 #define TMP_SMARK               TMP_MARK
 339 #define TMP_MARK                __tmp_marker = 0
 340 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 341 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 342 #define TMP_ALLOC(n)            __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 343 #define TMP_SFREE               TMP_FREE
 344 #define TMP_FREE                __gmp_tmp_reentrant_free (__tmp_marker)
 345 #endif
 346
 347 #if WANT_TMP_NOTREENTRANT
 348 struct tmp_marker
 349 {
 350   struct tmp_stack *which_chunk;
 351   void *alloc_point;
 352 };
 353 void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
 354 void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
 355 void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
 356 #define TMP_SDECL               TMP_DECL
 357 #define TMP_DECL                struct tmp_marker __tmp_marker
 358 #define TMP_SMARK               TMP_MARK
 359 #define TMP_MARK                __gmp_tmp_mark (&__tmp_marker)
 360 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 361 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 362 #define TMP_ALLOC(n)                                                    \
 363   __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
 364 #define TMP_SFREE               TMP_FREE
 365 #define TMP_FREE                __gmp_tmp_free (&__tmp_marker)
 366 #endif
 367
 368 #if WANT_TMP_DEBUG
 369 /* See tal-debug.c for some comments. */
 370 struct tmp_debug_t {
 371   struct tmp_debug_entry_t  *list;
 372   const char                *file;
 373   int                       line;
 374 };
 375 struct tmp_debug_entry_t {
 376   struct tmp_debug_entry_t  *next;
 377   char                      *block;
 378   size_t                    size;
 379 };
 380 void  __gmp_tmp_debug_mark  __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
 381                                      struct tmp_debug_t *,
 382                                      const char *, const char *));
 383 void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
 384                                      struct tmp_debug_t **, const char *,
 385                                      size_t)) ATTRIBUTE_MALLOC;
 386 void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
 387                                      struct tmp_debug_t **,
 388                                      const char *, const char *));
 389 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 390 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 391 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 392 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 393 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 394 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 395 /* The marker variable is designed to provoke an uninitialized variable
 396    warning from the compiler if TMP_FREE is used without a TMP_MARK.
 397    __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
 398    these things up too.  */
 399 #define TMP_DECL_NAME(marker, marker_name)                      \
 400   int marker;                                                   \
 401   int __tmp_marker_inscope;                                     \
 402   const char *__tmp_marker_name = marker_name;                  \
 403   struct tmp_debug_t  __tmp_marker_struct;                      \
 404   /* don't demand NULL, just cast a zero */                     \
 405   struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
 406 #define TMP_MARK_NAME(marker, marker_name)                      \
 407   do {                                                          \
 408     marker = 1;                                                 \
 409     __tmp_marker_inscope = 1;                                   \
 410     __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
 411                            &__tmp_marker, &__tmp_marker_struct, \
 412                            __tmp_marker_name, marker_name);     \
 413   } while (0)
 414 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 415 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 416 #define TMP_ALLOC(size)                                                 \
 417   __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
 418                          __tmp_marker_inscope,                          \
 419                          &__tmp_marker, __tmp_marker_name, size)
 420 #define TMP_FREE_NAME(marker, marker_name)                      \
 421   do {                                                          \
 422     __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
 423                            marker, &__tmp_marker,               \
 424                            __tmp_marker_name, marker_name);     \
 425   } while (0)
 426 #endif /* WANT_TMP_DEBUG */
 427
 428
 429 /* Allocating various types. */
 430 #define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
 431 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
 432 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
 433 #define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
 434 #define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
 435 #define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
 436 #define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
 437 #define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
 438 #define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
 439
 440 /* It's more efficient to allocate one block than two.  This is certainly
 441    true of the malloc methods, but it can even be true of alloca if that
 442    involves copying a chunk of stack (various RISCs), or a call to a stack
 443    bounds check (mingw).  In any case, when debugging keep separate blocks
 444    so a redzoning malloc debugger can protect each individually.  */
 445 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \
 446   do {                                                  \
 447     if (WANT_TMP_DEBUG)                                 \
 448       {                                                 \
 449         (xp) = TMP_ALLOC_LIMBS (xsize);                 \
 450         (yp) = TMP_ALLOC_LIMBS (ysize);                 \
 451       }                                                 \
 452     else                                                \
 453       {                                                 \
 454         (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \
 455         (yp) = (xp) + (xsize);                          \
 456       }                                                 \
 457   } while (0)
 458
 459
 460 /* From gmp.h, nicer names for internal use. */
 461 #define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
 462 #define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
 463 #define LIKELY(cond)                   __GMP_LIKELY(cond)
 464 #define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
 465
 466 #define ABS(x) ((x) >= 0 ? (x) : -(x))
 467 #undef MIN
 468 #define MIN(l,o) ((l) < (o) ? (l) : (o))
 469 #undef MAX
 470 #define MAX(h,i) ((h) > (i) ? (h) : (i))
 471 #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
 472
 473 /* Field access macros.  */
 474 #define SIZ(x) ((x)->_mp_size)
 475 #define ABSIZ(x) ABS (SIZ (x))
 476 #define PTR(x) ((x)->_mp_d)
 477 #define LIMBS(x) ((x)->_mp_d)
 478 #define EXP(x) ((x)->_mp_exp)
 479 #define PREC(x) ((x)->_mp_prec)
 480 #define ALLOC(x) ((x)->_mp_alloc)
 481
 482 /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
 483    then that lowest one bit must have been the only bit set.  n==0 will
 484    return true though, so avoid that.  */
 485 #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
 486
 487
 488 /* The "short" defines are a bit different because shorts are promoted to
 489    ints by ~ or >> etc.
 490
 491    #ifndef's are used since on some systems (HP?) header files other than
 492    limits.h setup these defines.  We could forcibly #undef in that case, but
 493    there seems no need to worry about that.  */
 494
 495 #ifndef ULONG_MAX
 496 #define ULONG_MAX   __GMP_ULONG_MAX
 497 #endif
 498 #ifndef UINT_MAX
 499 #define UINT_MAX    __GMP_UINT_MAX
 500 #endif
 501 #ifndef USHRT_MAX
 502 #define USHRT_MAX   __GMP_USHRT_MAX
 503 #endif
 504 #define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
 505
 506 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
 507    unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
 508    treats the plain decimal values in <limits.h> as signed.  */
 509 #define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
 510 #define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
 511 #define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
 512 #define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
 513
 514 #ifndef LONG_MIN
 515 #define LONG_MIN           ((long) ULONG_HIGHBIT)
 516 #endif
 517 #ifndef LONG_MAX
 518 #define LONG_MAX           (-(LONG_MIN+1))
 519 #endif
 520
 521 #ifndef INT_MIN
 522 #define INT_MIN            ((int) UINT_HIGHBIT)
 523 #endif
 524 #ifndef INT_MAX
 525 #define INT_MAX            (-(INT_MIN+1))
 526 #endif
 527
 528 #ifndef SHRT_MIN
 529 #define SHRT_MIN           ((short) USHRT_HIGHBIT)
 530 #endif
 531 #ifndef SHRT_MAX
 532 #define SHRT_MAX           ((short) (-(SHRT_MIN+1)))
 533 #endif
 534
 535 #if __GMP_MP_SIZE_T_INT
 536 #define MP_SIZE_T_MAX      INT_MAX
 537 #define MP_SIZE_T_MIN      INT_MIN
 538 #else
 539 #define MP_SIZE_T_MAX      LONG_MAX
 540 #define MP_SIZE_T_MIN      LONG_MIN
 541 #endif
 542
 543 /* mp_exp_t is the same as mp_size_t */
 544 #define MP_EXP_T_MAX   MP_SIZE_T_MAX
 545 #define MP_EXP_T_MIN   MP_SIZE_T_MIN
 546
 547 #define LONG_HIGHBIT       LONG_MIN
 548 #define INT_HIGHBIT        INT_MIN
 549 #define SHRT_HIGHBIT       SHRT_MIN
 550
 551
 552 #define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
 553
 554 #if GMP_NAIL_BITS == 0
 555 #define GMP_NAIL_LOWBIT   CNST_LIMB(0)
 556 #else
 557 #define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
 558 #endif
 559
 560 #if GMP_NAIL_BITS != 0
 561 /* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
 562    code that has not yet been qualified.  */
 563
 564 #undef DIV_SB_PREINV_THRESHOLD
 565 #undef DIV_DC_THRESHOLD
 566 #undef POWM_THRESHOLD
 567 #define DIV_SB_PREINV_THRESHOLD           MP_SIZE_T_MAX
 568 #define DIV_DC_THRESHOLD                 50
 569 #define POWM_THRESHOLD                    0
 570
 571 #undef GCD_ACCEL_THRESHOLD
 572 #define GCD_ACCEL_THRESHOLD               3
 573
 574 #undef DIVREM_1_NORM_THRESHOLD
 575 #undef DIVREM_1_UNNORM_THRESHOLD
 576 #undef MOD_1_NORM_THRESHOLD
 577 #undef MOD_1_UNNORM_THRESHOLD
 578 #undef USE_PREINV_DIVREM_1
 579 #undef USE_PREINV_MOD_1
 580 #undef DIVREM_2_THRESHOLD
 581 #undef DIVEXACT_1_THRESHOLD
 582 #undef MODEXACT_1_ODD_THRESHOLD
 583 #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
 584 #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
 585 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
 586 #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
 587 #define USE_PREINV_DIVREM_1               0  /* no preinv */
 588 #define USE_PREINV_MOD_1                  0  /* no preinv */
 589 #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
 590
 591 /* mpn/generic/mul_fft.c is not nails-capable. */
 592 #undef  MUL_FFT_THRESHOLD
 593 #undef  SQR_FFT_THRESHOLD
 594 #define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
 595 #define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
 596 #endif
 597
 598 /* Swap macros. */
 599
 600 #define MP_LIMB_T_SWAP(x, y)                    \
 601   do {                                          \
 602     mp_limb_t __mp_limb_t_swap__tmp = (x);      \
 603     (x) = (y);                                  \
 604     (y) = __mp_limb_t_swap__tmp;                \
 605   } while (0)
 606 #define MP_SIZE_T_SWAP(x, y)                    \
 607   do {                                          \
 608     mp_size_t __mp_size_t_swap__tmp = (x);      \
 609     (x) = (y);                                  \
 610     (y) = __mp_size_t_swap__tmp;                \
 611   } while (0)
 612
 613 #define MP_PTR_SWAP(x, y)               \
 614   do {                                  \
 615     mp_ptr __mp_ptr_swap__tmp = (x);    \
 616     (x) = (y);                          \
 617     (y) = __mp_ptr_swap__tmp;           \
 618   } while (0)
 619 #define MP_SRCPTR_SWAP(x, y)                    \
 620   do {                                          \
 621     mp_srcptr __mp_srcptr_swap__tmp = (x);      \
 622     (x) = (y);                                  \
 623     (y) = __mp_srcptr_swap__tmp;                \
 624   } while (0)
 625
 626 #define MPN_PTR_SWAP(xp,xs, yp,ys)      \
 627   do {                                  \
 628     MP_PTR_SWAP (xp, yp);               \
 629     MP_SIZE_T_SWAP (xs, ys);            \
 630   } while(0)
 631 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
 632   do {                                  \
 633     MP_SRCPTR_SWAP (xp, yp);            \
 634     MP_SIZE_T_SWAP (xs, ys);            \
 635   } while(0)
 636
 637 #define MPZ_PTR_SWAP(x, y)              \
 638   do {                                  \
 639     mpz_ptr __mpz_ptr_swap__tmp = (x);  \
 640     (x) = (y);                          \
 641     (y) = __mpz_ptr_swap__tmp;          \
 642   } while (0)
 643 #define MPZ_SRCPTR_SWAP(x, y)                   \
 644   do {                                          \
 645     mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
 646     (x) = (y);                                  \
 647     (y) = __mpz_srcptr_swap__tmp;               \
 648   } while (0)
 649
 650
 651 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
 652    but current gcc (3.0) doesn't seem to support that.  */
 653 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
 654 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
 655 __GMP_DECLSPEC extern void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
 656
 657 void *__gmp_default_allocate __GMP_PROTO ((size_t));
 658 void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
 659 void __gmp_default_free __GMP_PROTO ((void *, size_t));
 660
 661 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
 662   ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
 663 #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
 664
 665 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
 666   ((type *) (*__gmp_reallocate_func)                            \
 667    (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
 668 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
 669   __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
 670
 671 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
 672 #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
 673
 674 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
 675   do {                                                          \
 676     if ((oldsize) != (newsize))                                 \
 677       (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
 678   } while (0)
 679
 680 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
 681   do {                                                                  \
 682     if ((oldsize) != (newsize))                                         \
 683       (ptr) = (type *) (*__gmp_reallocate_func)                         \
 684         (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
 685   } while (0)
 686
 687
 688 /* Dummy for non-gcc, code involving it will go dead. */
 689 #if ! defined (__GNUC__) || __GNUC__ < 2
 690 #define __builtin_constant_p(x)   0
 691 #endif
 692
 693
 694 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
 695    stack usage is compatible.  __attribute__ ((regparm (N))) helps by
 696    putting leading parameters in registers, avoiding extra stack.
 697
 698    regparm cannot be used with calls going through the PLT, because the
 699    binding code there may clobber the registers (%eax, %edx, %ecx) used for
 700    the regparm parameters.  Calls to local (ie. static) functions could
 701    still use this, if we cared to differentiate locals and globals.
 702
 703    On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
 704    -p or -pg profiling, since that version of gcc doesn't realize the
 705    .mcount calls will clobber the parameter registers.  Other systems are
 706    ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
 707    bother to try to detect this.  regparm is only an optimization so we just
 708    disable it when profiling (profiling being a slowdown anyway).  */
 709
 710 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
 711   && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
 712 #define USE_LEADING_REGPARM 1
 713 #else
 714 #define USE_LEADING_REGPARM 0
 715 #endif
 716
 717 /* Macros for altering parameter order according to regparm usage. */
 718 #if USE_LEADING_REGPARM
 719 #define REGPARM_2_1(a,b,x)    x,a,b
 720 #define REGPARM_3_1(a,b,c,x)  x,a,b,c
 721 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
 722 #else
 723 #define REGPARM_2_1(a,b,x)    a,b,x
 724 #define REGPARM_3_1(a,b,c,x)  a,b,c,x
 725 #define REGPARM_ATTR(n)
 726 #endif
 727
 728
 729 /* ASM_L gives a local label for a gcc asm block, for use when temporary
 730    local labels like "1:" might not be available, which is the case for
 731    instance on the x86s (the SCO assembler doesn't support them).
 732
 733    The label generated is made unique by including "%=" which is a unique
 734    number for each insn.  This ensures the same name can be used in multiple
 735    asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
 736    allowed there's no need for a label to be usable outside a single
 737    block.  */
 738
 739 #define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
 740
 741
 742 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
 743 #if 0
 744 /* FIXME: Check that these actually improve things.
 745    FIXME: Need a cld after each std.
 746    FIXME: Can't have inputs in clobbered registers, must describe them as
 747    dummy outputs, and add volatile. */
 748 #define MPN_COPY_INCR(DST, SRC, N)                                      \
 749   __asm__ ("cld\n\trep\n\tmovsl" : :                                    \
 750            "D" (DST), "S" (SRC), "c" (N) :                              \
 751            "cx", "di", "si", "memory")
 752 #define MPN_COPY_DECR(DST, SRC, N)                                      \
 753   __asm__ ("std\n\trep\n\tmovsl" : :                                    \
 754            "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
 755            "cx", "di", "si", "memory")
 756 #endif
 757 #endif
 758
 759
 760 void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
 761 #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
 762
 763 #define mpz_n_pow_ui __gmpz_n_pow_ui
 764 void    mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
 765
 766
 767 #define mpn_addmul_1c __MPN(addmul_1c)
 768 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 769
 770 #define mpn_addmul_2 __MPN(addmul_2)
 771 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 772
 773 #define mpn_addmul_3 __MPN(addmul_3)
 774 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 775
 776 #define mpn_addmul_4 __MPN(addmul_4)
 777 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 778
 779 #define mpn_addmul_5 __MPN(addmul_5)
 780 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 781
 782 #define mpn_addmul_6 __MPN(addmul_6)
 783 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 784
 785 #define mpn_addmul_7 __MPN(addmul_7)
 786 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 787
 788 #define mpn_addmul_8 __MPN(addmul_8)
 789 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 790
 791 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
 792    returns the carry out (0, 1 or 2).  */
 793 #define mpn_addlsh1_n __MPN(addlsh1_n)
 794 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 795
 796 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
 797    returns the borrow out (0, 1 or 2).  */
 798 #define mpn_sublsh1_n __MPN(sublsh1_n)
 799 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 800
 801 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
 802    and returns the bit rshifted out (0 or 1).  */
 803 #define mpn_rsh1add_n __MPN(rsh1add_n)
 804 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 805
 806 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
 807    and returns the bit rshifted out (0 or 1).  If there's a borrow from the
 808    subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
 809    complement negative.  */
 810 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
 811 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 812
 813 #define mpn_lshiftc __MPN(lshiftc)
 814 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
 815
 816 #define mpn_addsub_n __MPN(addsub_n)
 817 __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 818
 819 #define mpn_addsub_nc __MPN(addsub_nc)
 820 __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 821
 822 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
 823 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 824
 825 #define mpn_divrem_1c __MPN(divrem_1c)
 826 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 827
 828 #define mpn_dump __MPN(dump)
 829 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
 830
 831 #define mpn_fib2_ui __MPN(fib2_ui)
 832 mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
 833
 834 /* Remap names of internal mpn functions.  */
 835 #define __clz_tab               __MPN(clz_tab)
 836 #define mpn_udiv_w_sdiv         __MPN(udiv_w_sdiv)
 837
 838 #define mpn_jacobi_base __MPN(jacobi_base)
 839 int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
 840
 841 #define mpn_mod_1c __MPN(mod_1c)
 842 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
 843
 844 #define mpn_mul_1c __MPN(mul_1c)
 845 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 846
 847 #define mpn_mul_2 __MPN(mul_2)
 848 mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 849
 850 #define mpn_mul_3 __MPN(mul_3)
 851 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 852
 853 #define mpn_mul_4 __MPN(mul_4)
 854 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 855
 856 #ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
 857 #define mpn_mul_basecase __MPN(mul_basecase)
 858 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
 859 #endif
 860
 861 #define mpn_mullow_n __MPN(mullow_n)
 862 __GMP_DECLSPEC void mpn_mullow_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 863
 864 #define mpn_mullow_basecase __MPN(mullow_basecase)
 865 __GMP_DECLSPEC void mpn_mullow_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 866
 867 #define mpn_sqr_n __MPN(sqr_n)
 868 __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 869
 870 #ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
 871 #define mpn_sqr_basecase __MPN(sqr_basecase)
 872 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 873 #endif
 874
 875 #define mpn_submul_1c __MPN(submul_1c)
 876 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 877
 878 #define mpn_invert_2exp __MPN(invert_2exp)
 879 __GMP_DECLSPEC void mpn_invert_2exp __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 880
 881 #define mpn_redc_1 __MPN(redc_1)
 882 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);)
 883
 884 #define mpn_redc_2 __MPN(redc_2)
 885 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 886
 887
 888 #define mpn_mod_1s_1p_cps __MPN(mod_1s_1p_cps)
 889 __GMP_DECLSPEC void mpn_mod_1s_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
 890 #define mpn_mod_1s_1p __MPN(mod_1s_1p)
 891 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]));
 892
 893 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
 894 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
 895 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
 896 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]));
 897
 898 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
 899 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
 900 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
 901 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]));
 902
 903 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
 904 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
 905 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
 906 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]));
 907
 908
 909 typedef __gmp_randstate_struct *gmp_randstate_ptr;
 910 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
 911
 912 /* Pseudo-random number generator function pointers structure.  */
 913 typedef struct {
 914   void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
 915   void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
 916   void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
 917   void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
 918 } gmp_randfnptr_t;
 919
 920 /* Macro to obtain a void pointer to the function pointers structure.  */
 921 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
 922
 923 /* Macro to obtain a pointer to the generator's state.
 924    When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
 925 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
 926
 927 /* Write a given number of random bits to rp.  */
 928 #define _gmp_rand(rp, state, bits)                              \
 929   do {                                                          \
 930     gmp_randstate_ptr  __rstate = (state);                      \
 931     (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \
 932        (__rstate, rp, bits);                                    \
 933   } while (0)
 934
 935 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
 936
 937
 938 /* __gmp_rands is the global state for the old-style random functions, and
 939    is also used in the test programs (hence the __GMP_DECLSPEC).
 940
 941    There's no seeding here, so mpz_random etc will generate the same
 942    sequence every time.  This is not unlike the C library random functions
 943    if you don't seed them, so perhaps it's acceptable.  Digging up a seed
 944    from /dev/random or the like would work on many systems, but might
 945    encourage a false confidence, since it'd be pretty much impossible to do
 946    something that would work reliably everywhere.  In any case the new style
 947    functions are recommended to applications which care about randomness, so
 948    the old functions aren't too important.  */
 949
 950 __GMP_DECLSPEC extern char             __gmp_rands_initialized;
 951 __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
 952
 953 #define RANDS                                       \
 954   ((__gmp_rands_initialized ? 0                     \
 955     : (__gmp_rands_initialized = 1,                 \
 956        __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
 957    __gmp_rands)
 958
 959 /* this is used by the test programs, to free memory */
 960 #define RANDS_CLEAR()                   \
 961   do {                                  \
 962     if (__gmp_rands_initialized)        \
 963       {                                 \
 964         __gmp_rands_initialized = 0;    \
 965         gmp_randclear (__gmp_rands);    \
 966       }                                 \
 967   } while (0)
 968
 969
 970 /* FIXME: Make these itch functions less conservative.  Also consider making
 971    them dependent on just 'an', and compute the allocation directly from 'an'
 972    instead of via n.  */
 973 static inline mp_size_t
 974 mpn_toom22_mul_itch (mp_size_t an, mp_size_t bn)
 975 {
 976   mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
 977   return 4 * n + 2;
 978 }
 979
 980 static inline mp_size_t
 981 mpn_toom33_mul_itch (mp_size_t an, mp_size_t bn)
 982 {
 983   /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
 984      mpn_toom_interpolate_5pts only needs scratch otherwise.  */
 985   mp_size_t n = (an + 2) / (size_t) 3;
 986   return 6 * n + GMP_NUMB_BITS;
 987 }
 988
 989 static inline mp_size_t
 990 mpn_toom44_mul_itch (mp_size_t an, mp_size_t bn)
 991 {
 992   mp_size_t n = (an + 3) >> 2;
 993   return 12 * n + GMP_NUMB_BITS;
 994 }
 995
 996 static inline mp_size_t
 997 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
 998 {
 999   mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
1000   return 4 * n + 2;
1001 }
1002
1003 static inline mp_size_t
1004 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
1005 {
1006   /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
1007      mpn_toom_interpolate_5pts only needs scratch otherwise.  */
1008   mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
1009   return 6 * n + 3;
1010 }
1011
1012 static inline mp_size_t
1013 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
1014 {
1015   mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
1016   return 10 * n + 10;
1017 }
1018
1019 static inline mp_size_t
1020 mpn_toom2_sqr_itch (mp_size_t an)
1021 {
1022   mp_size_t n = 1 + ((an - 1) >> 1);
1023   return 4 * n + 2;
1024 }
1025
1026 static inline mp_size_t
1027 mpn_toom3_sqr_itch (mp_size_t an)
1028 {
1029   /* We could trim this to 4n+3 if HAVE_NATIVE_mpn_sublsh1_n, since
1030      mpn_toom_interpolate_5pts only needs scratch otherwise.  */
1031   mp_size_t n = (an + 2) / (size_t) 3;
1032   return 6 * n + GMP_NUMB_BITS;
1033 }
1034
1035 static inline mp_size_t
1036 mpn_toom4_sqr_itch (mp_size_t an)
1037 {
1038   mp_size_t n = (an + 3) >> 2;
1039   return 12 * n + GMP_NUMB_BITS;
1040 }
1041
1042
1043 /* kara uses n+1 limbs of temporary space and then recurses with the balance,
1044    so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ...  This can be solved to
1045    2n + o(n).  Since n is very limited, o(n) in practice could be around 15.
1046    For now, assume n is arbitrarily large.  */
1047 #define MPN_KARA_MUL_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)
1048 #define MPN_KARA_SQR_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)
1049
1050 /* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is
1051    unavailable, but just 2n + o(n) if mpn_sublsh1_n is available.  It is hard
1052    to pin down the value of o(n), since it is a complex function of
1053    MUL_TOOM3_THRESHOLD and n.  Normally toom3 is used between kara and fft; in
1054    that case o(n) will be really limited.  If toom3 is used for arbitrarily
1055    large operands, o(n) will be larger.  These definitions handle operands of
1056    up to 8956264246117233 limbs.  A single multiplication using toom3 on the
1057    fastest hardware currently (2008) would need 10 million years, which
1058    suggests that these limits are acceptable.  */
1059 #if WANT_FFT
1060 #if HAVE_NATIVE_mpn_sublsh1_n
1061 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 63)
1062 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 63)
1063 #else
1064 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)
1065 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)
1066 #endif
1067 #else /* WANT_FFT */
1068 #if HAVE_NATIVE_mpn_sublsh1_n
1069 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 255)
1070 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 255)
1071 #else
1072 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)
1073 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)
1074 #endif
1075 #define MPN_TOOM44_MAX_N 285405
1076 #endif /* WANT_FFT */
1077
1078 /* need 2 so that n2>=1 */
1079 #define MPN_KARA_MUL_N_MINSIZE    2
1080 #define MPN_KARA_SQR_N_MINSIZE    2
1081
1082 /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
1083 #define MPN_TOOM3_MUL_N_MINSIZE   17
1084 #define MPN_TOOM3_SQR_N_MINSIZE   17
1085
1086 #define MPN_TOOM44_MUL_N_MINSIZE  30    /* ??? */
1087 #define MPN_TOOM4_SQR_N_MINSIZE   30    /* ??? */
1088
1089 #define   mpn_sqr_diagonal __MPN(sqr_diagonal)
1090 void      mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1091
1092 #define   mpn_kara_mul_n __MPN(kara_mul_n)
1093 void      mpn_kara_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
1094
1095 #define   mpn_kara_sqr_n __MPN(kara_sqr_n)
1096 void      mpn_kara_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1097
1098 #define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1099 void      mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t, mp_ptr));
1100
1101 enum toom4_flags { toom4_w1_neg = 1, toom4_w3_neg = 2 }; /* FIXME */
1102 #define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1103 void      mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom4_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1104
1105 #define   mpn_toom3_mul_n __MPN(toom3_mul_n)
1106 void      mpn_toom3_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
1107
1108 #define   mpn_toom3_sqr_n __MPN(toom3_sqr_n)
1109 void      mpn_toom3_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1110
1111 #define   mpn_toom22_mul __MPN(toom22_mul)
1112 void      mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1113
1114 #define   mpn_toom2_sqr __MPN(toom2_sqr)
1115 void      mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1116
1117 #define   mpn_toom33_mul __MPN(toom33_mul)
1118 void      mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1119
1120 #define   mpn_toom3_sqr __MPN(toom3_sqr)
1121 void      mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1122
1123 #define   mpn_toom44_mul __MPN(toom44_mul)
1124 void      mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1125
1126 #define   mpn_toom32_mul __MPN(toom32_mul)
1127 void      mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1128
1129 #define   mpn_toom42_mul __MPN(toom42_mul)
1130 void      mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1131
1132 #define   mpn_toom53_mul __MPN(toom53_mul)
1133 void      mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1134
1135 #define   mpn_toom62_mul __MPN(toom62_mul)
1136 void      mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1137
1138 #define   mpn_toom4_sqr __MPN(toom4_sqr)
1139 void      mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1140
1141 #define   mpn_fft_best_k __MPN(fft_best_k)
1142 int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1143
1144 #define   mpn_mul_fft __MPN(mul_fft)
1145 void      mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
1146
1147 #define   mpn_mul_fft_full __MPN(mul_fft_full)
1148 void      mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1149
1150 #define   mpn_fft_next_size __MPN(fft_next_size)
1151 mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1152
1153 #define   mpn_sb_divrem_mn __MPN(sb_divrem_mn)
1154 mp_limb_t mpn_sb_divrem_mn __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1155
1156 #define   mpn_dc_divrem_n __MPN(dc_divrem_n)
1157 mp_limb_t mpn_dc_divrem_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
1158
1159 #define   mpn_sb_div_qr __MPN(sb_div_qr)
1160 mp_limb_t mpn_sb_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1161 #define   mpn_sb_div_q __MPN(sb_div_q)
1162 mp_limb_t mpn_sb_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1163 #define   mpn_sb_divappr_q __MPN(sb_divappr_q)
1164 mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1165 #define   mpn_dc_div_qr __MPN(dc_div_qr)
1166 mp_limb_t mpn_dc_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1167 #define   mpn_dc_div_qr_n __MPN(dc_div_qr_n)
1168 mp_limb_t mpn_dc_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_ptr));
1169 #define   mpn_dc_div_q __MPN(dc_div_q)
1170 mp_limb_t mpn_dc_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1171 #define   mpn_preinv_dc_div_qr __MPN(preinv_dc_div_qr)
1172 mp_limb_t mpn_preinv_dc_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1173 #define   mpn_dc_divappr_q __MPN(dc_divappr_q)
1174 mp_limb_t mpn_dc_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
1175 #define   mpn_dc_divappr_q_n __MPN(dc_divappr_q_n)
1176 mp_limb_t mpn_dc_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_ptr));
1177 #define   mpn_preinv_dc_divappr_q __MPN(preinv_dc_divappr_q)
1178 mp_limb_t mpn_preinv_dc_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr));
1179 #define   mpn_mu_div_qr __MPN(mu_div_qr)
1180 mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1181 #define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1182 mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1183 #define   mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1184 mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1185 #define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1186 void      mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1187 #define   mpn_mu_divappr_q __MPN(mu_divappr_q)
1188 mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1189 #define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1190 mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1191 #define   mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1192 mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1193 #define   mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1194 void      mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1195 #define   mpn_mu_div_q __MPN(mu_div_q)
1196 mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1197 #define   mpn_invert __MPN(invert)
1198 void      mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1199 #define   mpn_invert_itch __MPN(invert_itch)
1200 mp_size_t mpn_invert_itch __GMP_PROTO ((mp_size_t));
1201
1202 #define   mpn_binvert __MPN(binvert)
1203 void      mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1204 #define   mpn_binvert_itch __MPN(binvert_itch)
1205 mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
1206 #define   mpn_sb_bdiv_qr __MPN(sb_bdiv_qr)
1207 mp_limb_t mpn_sb_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1208 #define   mpn_sb_bdiv_q __MPN(sb_bdiv_q)
1209 void      mpn_sb_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1210 #define   mpn_dc_bdiv_qr __MPN(dc_bdiv_qr)
1211 mp_limb_t mpn_dc_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1212 #define   mpn_dc_bdiv_qr_n_itch __MPN(dc_bdiv_qr_n_itch)
1213 mp_size_t mpn_dc_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
1214 #define   mpn_dc_bdiv_qr_n __MPN(dc_bdiv_qr_n)
1215 mp_limb_t mpn_dc_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1216 #define   mpn_dc_bdiv_q __MPN(dc_bdiv_q)
1217 void      mpn_dc_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1218 #define   mpn_dc_bdiv_q_n_itch __MPN(dc_bdiv_q_n_itch)
1219 mp_size_t mpn_dc_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
1220 #define   mpn_dc_bdiv_q_n __MPN(dc_bdiv_q_n)
1221 void      mpn_dc_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1222 #define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1223 void      mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1224 #define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1225 mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1226 #define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1227 void      mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1228 #define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1229 mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1230
1231 #define   mpn_divexact __MPN(divexact)
1232 void      mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1233 #define   mpn_divexact_itch __MPN(divexact_itch)
1234 mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1235
1236
1237 #define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1238 mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
1239 #define   mpn_bdiv_dbm1(dst, src, size, divisor) \
1240   mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1241
1242 #define   mpn_powm __MPN(powm)
1243 void      mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1244 #define   mpn_powlo __MPN(powlo)
1245 void      mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1246
1247 #define   mpn_powm_sec __MPN(powm_sec)
1248 void      mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1249 #define   mpn_subcnd_n __MPN(subcnd_n)
1250 mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
1251 #define   mpn_tabselect __MPN(tabselect)
1252 void      mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
1253
1254 #ifndef DIVEXACT_BY3_METHOD
1255 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1256 #define DIVEXACT_BY3_METHOD 0   /* default to using mpn_bdiv_dbm1c */
1257 #else
1258 #define DIVEXACT_BY3_METHOD 1
1259 #endif
1260 #endif
1261
1262 #if DIVEXACT_BY3_METHOD == 0
1263 #undef mpn_divexact_by3
1264 #define mpn_divexact_by3(dst,src,size) \
1265   (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1266 /* override mpn_divexact_by3c defined in gmp.h */
1267 /*
1268 #undef mpn_divexact_by3c
1269 #define mpn_divexact_by3c(dst,src,size,cy) \
1270   (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1271 */
1272 #endif
1273
1274 #if GMP_NUMB_BITS % 4 == 0
1275 #define mpn_divexact_by5(dst,src,size) \
1276   (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1277 #endif
1278
1279 #if GMP_NUMB_BITS % 6 == 0
1280 #define mpn_divexact_by7(dst,src,size) \
1281   (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1282 #endif
1283
1284 #if GMP_NUMB_BITS % 6 == 0
1285 #define mpn_divexact_by9(dst,src,size) \
1286   (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1287 #endif
1288
1289 #if GMP_NUMB_BITS % 10 == 0
1290 #define mpn_divexact_by11(dst,src,size) \
1291   (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1292 #endif
1293
1294 #if GMP_NUMB_BITS % 12 == 0
1295 #define mpn_divexact_by13(dst,src,size) \
1296   (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1297 #endif
1298
1299 #if GMP_NUMB_BITS % 4 == 0
1300 #define mpn_divexact_by15(dst,src,size) \
1301   (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1302 #endif
1303
1304 #define mpz_divexact_gcd  __gmpz_divexact_gcd
1305 void    mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
1306
1307 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1308 #ifdef _GMP_H_HAVE_FILE
1309 size_t  mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
1310 #endif
1311
1312 #define mpn_divisible_p __MPN(divisible_p)
1313 int     mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
1314
1315 #define   mpn_rootrem __MPN(rootrem)
1316 mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1317
1318
1319 #if defined (_CRAY)
1320 #define MPN_COPY_INCR(dst, src, n)                                      \
1321   do {                                                                  \
1322     int __i;            /* Faster on some Crays with plain int */       \
1323     _Pragma ("_CRI ivdep");                                             \
1324     for (__i = 0; __i < (n); __i++)                                     \
1325       (dst)[__i] = (src)[__i];                                          \
1326   } while (0)
1327 #endif
1328
1329 /* used by test programs, hence __GMP_DECLSPEC */
1330 #ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
1331 #define mpn_copyi __MPN(copyi)
1332 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1333 #endif
1334
1335 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1336 #define MPN_COPY_INCR(dst, src, size)                   \
1337   do {                                                  \
1338     ASSERT ((size) >= 0);                               \
1339     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
1340     mpn_copyi (dst, src, size);                         \
1341   } while (0)
1342 #endif
1343
1344 /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
1345 #if ! defined (MPN_COPY_INCR)
1346 #define MPN_COPY_INCR(dst, src, n)                      \
1347   do {                                                  \
1348     ASSERT ((n) >= 0);                                  \
1349     ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
1350     if ((n) != 0)                                       \
1351       {                                                 \
1352         mp_size_t __n = (n) - 1;                        \
1353         mp_ptr __dst = (dst);                           \
1354         mp_srcptr __src = (src);                        \
1355         mp_limb_t __x;                                  \
1356         __x = *__src++;                                 \
1357         if (__n != 0)                                   \
1358           {                                             \
1359             do                                          \
1360               {                                         \
1361                 *__dst++ = __x;                         \
1362                 __x = *__src++;                         \
1363               }                                         \
1364             while (--__n);                              \
1365           }                                             \
1366         *__dst++ = __x;                                 \
1367       }                                                 \
1368   } while (0)
1369 #endif
1370
1371
1372 #if defined (_CRAY)
1373 #define MPN_COPY_DECR(dst, src, n)                                      \
1374   do {                                                                  \
1375     int __i;            /* Faster on some Crays with plain int */       \
1376     _Pragma ("_CRI ivdep");                                             \
1377     for (__i = (n) - 1; __i >= 0; __i--)                                \
1378       (dst)[__i] = (src)[__i];                                          \
1379   } while (0)
1380 #endif
1381
1382 /* used by test programs, hence __GMP_DECLSPEC */
1383 #ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
1384 #define mpn_copyd __MPN(copyd)
1385 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1386 #endif
1387
1388 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1389 #define MPN_COPY_DECR(dst, src, size)                   \
1390   do {                                                  \
1391     ASSERT ((size) >= 0);                               \
1392     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
1393     mpn_copyd (dst, src, size);                         \
1394   } while (0)
1395 #endif
1396
1397 /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
1398 #if ! defined (MPN_COPY_DECR)
1399 #define MPN_COPY_DECR(dst, src, n)                      \
1400   do {                                                  \
1401     ASSERT ((n) >= 0);                                  \
1402     ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
1403     if ((n) != 0)                                       \
1404       {                                                 \
1405         mp_size_t __n = (n) - 1;                        \
1406         mp_ptr __dst = (dst) + __n;                     \
1407         mp_srcptr __src = (src) + __n;                  \
1408         mp_limb_t __x;                                  \
1409         __x = *__src--;                                 \
1410         if (__n != 0)                                   \
1411           {                                             \
1412             do                                          \
1413               {                                         \
1414                 *__dst-- = __x;                         \
1415                 __x = *__src--;                         \
1416               }                                         \
1417             while (--__n);                              \
1418           }                                             \
1419         *__dst-- = __x;                                 \
1420       }                                                 \
1421   } while (0)
1422 #endif
1423
1424
1425 #ifndef MPN_COPY
1426 #define MPN_COPY(d,s,n)                         \
1427   do {                                          \
1428     ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
1429     MPN_COPY_INCR (d, s, n);                    \
1430   } while (0)
1431 #endif
1432
1433
1434 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1435 #define MPN_REVERSE(dst, src, size)                     \
1436   do {                                                  \
1437     mp_ptr     __dst = (dst);                           \
1438     mp_size_t  __size = (size);                         \
1439     mp_srcptr  __src = (src) + __size - 1;              \
1440     mp_size_t  __i;                                     \
1441     ASSERT ((size) >= 0);                               \
1442     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
1443     CRAY_Pragma ("_CRI ivdep");                         \
1444     for (__i = 0; __i < __size; __i++)                  \
1445       {                                                 \
1446         *__dst = *__src;                                \
1447         __dst++;                                        \
1448         __src--;                                        \
1449       }                                                 \
1450   } while (0)
1451
1452
1453 /* Zero n limbs at dst.
1454
1455    For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
1456    ppc630 for instance this is optimal since it can sustain only 1 store per
1457    cycle.
1458
1459    gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1460    "for" loop in the generic code below can become stu/bdnz.  The do/while
1461    here helps it get to that.  The same caveat about plain -mpowerpc64 mode
1462    applies here as to __GMPN_COPY_INCR in gmp.h.
1463
1464    xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1465    this loop too.
1466
1467    Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1468    at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
1469    trouble than it's worth to do the same, though perhaps a call to memset
1470    would be good when on a GNU system.  */
1471
1472 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1473 #define MPN_ZERO(dst, n)                        \
1474   do {                                          \
1475     ASSERT ((n) >= 0);                          \
1476     if ((n) != 0)                               \
1477       {                                         \
1478         mp_ptr __dst = (dst) - 1;               \
1479         mp_size_t __n = (n);                    \
1480         do                                      \
1481           *++__dst = 0;                         \
1482         while (--__n);                          \
1483       }                                         \
1484   } while (0)
1485 #endif
1486
1487 #ifndef MPN_ZERO
1488 #define MPN_ZERO(dst, n)                        \
1489   do {                                          \
1490     ASSERT ((n) >= 0);                          \
1491     if ((n) != 0)                               \
1492       {                                         \
1493         mp_ptr __dst = (dst);                   \
1494         mp_size_t __n = (n);                    \
1495         do                                      \
1496           *__dst++ = 0;                         \
1497         while (--__n);                          \
1498       }                                         \
1499   } while (0)
1500 #endif
1501
1502
1503 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1504    start up and would need to strip a lot of zeros before it'd be faster
1505    than a simple cmpl loop.  Here are some times in cycles for
1506    std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1507    low zeros).
1508
1509                 std   cld
1510            P5    18    16
1511            P6    46    38
1512            K6    36    13
1513            K7    21    20
1514 */
1515 #ifndef MPN_NORMALIZE
1516 #define MPN_NORMALIZE(DST, NLIMBS) \
1517   do {                                                                  \
1518     while ((NLIMBS) > 0)                                                \
1519       {                                                                 \
1520         if ((DST)[(NLIMBS) - 1] != 0)                                   \
1521           break;                                                        \
1522         (NLIMBS)--;                                                     \
1523       }                                                                 \
1524   } while (0)
1525 #endif
1526 #ifndef MPN_NORMALIZE_NOT_ZERO
1527 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
1528   do {                                          \
1529     ASSERT ((NLIMBS) >= 1);                     \
1530     while (1)                                   \
1531       {                                         \
1532         if ((DST)[(NLIMBS) - 1] != 0)           \
1533           break;                                \
1534         (NLIMBS)--;                             \
1535       }                                         \
1536   } while (0)
1537 #endif
1538
1539 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1540    and decrementing size.  low should be ptr[0], and will be the new ptr[0]
1541    on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
1542    somewhere a non-zero limb.  */
1543 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
1544   do {                                                  \
1545     ASSERT ((size) >= 1);                               \
1546     ASSERT ((low) == (ptr)[0]);                         \
1547                                                         \
1548     while ((low) == 0)                                  \
1549       {                                                 \
1550         (size)--;                                       \
1551         ASSERT ((size) >= 1);                           \
1552         (ptr)++;                                        \
1553         (low) = *(ptr);                                 \
1554       }                                                 \
1555   } while (0)
1556
1557 /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
1558    temporary variable; it will be automatically cleared out at function
1559    return.  We use __x here to make it possible to accept both mpz_ptr and
1560    mpz_t arguments.  */
1561 #define MPZ_TMP_INIT(X, NLIMBS)                                         \
1562   do {                                                                  \
1563     mpz_ptr __x = (X);                                                  \
1564     ASSERT ((NLIMBS) >= 1);                                             \
1565     __x->_mp_alloc = (NLIMBS);                                          \
1566     __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);     \
1567   } while (0)
1568
1569 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
1570 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \
1571                           ? (mp_ptr) _mpz_realloc(z,n)  \
1572                           : PTR(z))
1573
1574 #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
1575
1576
1577 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1578    f1p.
1579
1580    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1581    nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
1582    number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1583
1584    The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1585    without good floating point.  There's +2 for rounding up, and a further
1586    +2 since at the last step x limbs are doubled into a 2x+1 limb region
1587    whereas the actual F[2k] value might be only 2x-1 limbs.
1588
1589    Note that a division is done first, since on a 32-bit system it's at
1590    least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
1591    about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1592    whatever a multiply of two 190Mbyte numbers takes.)
1593
1594    Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1595    worked into the multiplier.  */
1596
1597 #define MPN_FIB2_SIZE(n) \
1598   ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1599
1600
1601 /* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
1602    -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1603
1604    FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1605    F[n] + 2*F[n-1] fits in a limb.  */
1606
1607 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1608 #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
1609
1610
1611 /* For a threshold between algorithms A and B, size>=thresh is where B
1612    should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
1613    value 0 means only ever use B.  The tests for these special values will
1614    be compile-time constants, so the compiler should be able to eliminate
1615    the code for the unwanted algorithm.  */
1616
1617 #define ABOVE_THRESHOLD(size,thresh)    \
1618   ((thresh) == 0                        \
1619    || ((thresh) != MP_SIZE_T_MAX        \
1620        && (size) >= (thresh)))
1621 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
1622
1623 /* Usage: int  use_foo = BELOW_THRESHOLD (size, FOO_THRESHOLD);
1624           ...
1625           if (CACHED_BELOW_THRESHOLD (use_foo, size, FOO_THRESHOLD))
1626
1627    When "use_foo" is a constant (thresh is 0 or MP_SIZE_T), gcc prior to
1628    version 3.3 doesn't optimize away a test "if (use_foo)" when within a
1629    loop.  CACHED_BELOW_THRESHOLD helps it do so.  */
1630
1631 #define CACHED_ABOVE_THRESHOLD(cache, thresh)           \
1632   ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX           \
1633    ? ABOVE_THRESHOLD (0, thresh)                        \
1634    : (cache))
1635 #define CACHED_BELOW_THRESHOLD(cache, thresh)           \
1636   ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX           \
1637    ? BELOW_THRESHOLD (0, thresh)                        \
1638    : (cache))
1639
1640
1641 /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
1642    value which is good on most machines.  */
1643 #ifndef MUL_KARATSUBA_THRESHOLD
1644 #define MUL_KARATSUBA_THRESHOLD 32
1645 #endif
1646
1647 /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
1648    value which is good on most machines.  */
1649 #ifndef MUL_TOOM3_THRESHOLD
1650 #define MUL_TOOM3_THRESHOLD 128
1651 #endif
1652
1653 #ifndef MUL_TOOM44_THRESHOLD
1654 #define MUL_TOOM44_THRESHOLD 500
1655 #endif
1656
1657 /* Source compatibility while source is in flux.  */
1658 #define MUL_TOOM22_THRESHOLD MUL_KARATSUBA_THRESHOLD
1659 #define MUL_TOOM33_THRESHOLD MUL_TOOM3_THRESHOLD
1660 #define SQR_TOOM2_THRESHOLD SQR_KARATSUBA_THRESHOLD
1661
1662 /* MUL_KARATSUBA_THRESHOLD_LIMIT is the maximum for MUL_KARATSUBA_THRESHOLD.
1663    In a normal build MUL_KARATSUBA_THRESHOLD is a constant and we use that.
1664    In a fat binary or tune program build MUL_KARATSUBA_THRESHOLD is a
1665    variable and a separate hard limit will have been defined.  Similarly for
1666    TOOM3.  */
1667 #ifndef MUL_KARATSUBA_THRESHOLD_LIMIT
1668 #define MUL_KARATSUBA_THRESHOLD_LIMIT  MUL_KARATSUBA_THRESHOLD
1669 #endif
1670 #ifndef MUL_TOOM3_THRESHOLD_LIMIT
1671 #define MUL_TOOM3_THRESHOLD_LIMIT  MUL_TOOM3_THRESHOLD
1672 #endif
1673 #ifndef MULLOW_BASECASE_THRESHOLD_LIMIT
1674 #define MULLOW_BASECASE_THRESHOLD_LIMIT  MULLOW_BASECASE_THRESHOLD
1675 #endif
1676
1677 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1678    mpn_mul_basecase in mpn_sqr_n.  Default is to use mpn_sqr_basecase
1679    always.  (Note that we certainly always want it if there's a native
1680    assembler mpn_sqr_basecase.)
1681
1682    If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
1683    before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
1684    karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0.  This oddity arises
1685    more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
1686    which mpn_sqr_basecase should be used, and that may be never.  */
1687
1688 #ifndef SQR_BASECASE_THRESHOLD
1689 #define SQR_BASECASE_THRESHOLD 0
1690 #endif
1691
1692 #ifndef SQR_KARATSUBA_THRESHOLD
1693 #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
1694 #endif
1695
1696 #ifndef SQR_TOOM3_THRESHOLD
1697 #define SQR_TOOM3_THRESHOLD 128
1698 #endif
1699
1700 #ifndef SQR_TOOM4_THRESHOLD
1701 #define SQR_TOOM4_THRESHOLD 500
1702 #endif
1703
1704 /* See comments above about MUL_TOOM3_THRESHOLD_LIMIT.  */
1705 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1706 #define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
1707 #endif
1708
1709 #ifndef DC_DIV_QR_THRESHOLD
1710 #define DC_DIV_QR_THRESHOLD       43
1711 #endif
1712
1713 #ifndef DC_DIVAPPR_Q_THRESHOLD
1714 #define DC_DIVAPPR_Q_THRESHOLD   208
1715 #endif
1716
1717 #ifndef DC_DIV_Q_THRESHOLD
1718 #define DC_DIV_Q_THRESHOLD       228
1719 #endif
1720
1721 #ifndef DC_BDIV_QR_THRESHOLD
1722 #define DC_BDIV_QR_THRESHOLD      52
1723 #endif
1724
1725 #ifndef DC_BDIV_Q_THRESHOLD
1726 #define DC_BDIV_Q_THRESHOLD      224
1727 #endif
1728
1729 #ifndef DIVEXACT_JEB_THRESHOLD
1730 #define DIVEXACT_JEB_THRESHOLD    25
1731 #endif
1732
1733 #ifndef INV_NEWTON_THRESHOLD
1734 #define INV_NEWTON_THRESHOLD     654
1735 #endif
1736
1737 #ifndef BINV_NEWTON_THRESHOLD
1738 #define BINV_NEWTON_THRESHOLD    807
1739 #endif
1740
1741 #ifndef MU_DIVAPPR_Q_THRESHOLD
1742 #define MU_DIVAPPR_Q_THRESHOLD  4000
1743 #endif
1744
1745 #ifndef MU_DIV_Q_THRESHOLD
1746 #define MU_DIV_Q_THRESHOLD      4000
1747 #endif
1748
1749 #ifndef MU_BDIV_Q_THRESHOLD
1750 #define MU_BDIV_Q_THRESHOLD     2000
1751 #endif
1752
1753 /* First k to use for an FFT modF multiply.  A modF FFT is an order
1754    log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1755    whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
1756 #define FFT_FIRST_K  4
1757
1758 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1759 #ifndef MUL_FFT_MODF_THRESHOLD
1760 #define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM3_THRESHOLD * 3)
1761 #endif
1762 #ifndef SQR_FFT_MODF_THRESHOLD
1763 #define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
1764 #endif
1765
1766 /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
1767    will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1768    NxN->2N multiply and not recursing into itself is an order
1769    log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1770    is the first better than toom3.  */
1771 #ifndef MUL_FFT_THRESHOLD
1772 #define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
1773 #endif
1774 #ifndef SQR_FFT_THRESHOLD
1775 #define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
1776 #endif
1777
1778 /* Table of thresholds for successive modF FFT "k"s.  The first entry is
1779    where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1780    etc.  See mpn_fft_best_k(). */
1781 #ifndef MUL_FFT_TABLE
1782 #define MUL_FFT_TABLE                           \
1783   { MUL_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1784     MUL_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1785     MUL_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1786     MUL_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1787     MUL_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1788     MUL_TOOM3_THRESHOLD * 288, /* k=10 */       \
1789     0 }
1790 #endif
1791 #ifndef SQR_FFT_TABLE
1792 #define SQR_FFT_TABLE                           \
1793   { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1794     SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1795     SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1796     SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1797     SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1798     SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
1799     0 }
1800 #endif
1801
1802 #ifndef FFT_TABLE_ATTRS
1803 #define FFT_TABLE_ATTRS   static const
1804 #endif
1805
1806 #define MPN_FFT_TABLE_SIZE  16
1807
1808
1809 /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
1810    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
1811    i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
1812
1813    Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
1814    for 3 as an average.  */
1815
1816 #ifndef DIV_DC_THRESHOLD
1817 #define DIV_DC_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)
1818 #endif
1819
1820 #ifndef GET_STR_DC_THRESHOLD
1821 #define GET_STR_DC_THRESHOLD             18
1822 #endif
1823
1824 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1825 #define GET_STR_PRECOMPUTE_THRESHOLD     35
1826 #endif
1827
1828 #ifndef SET_STR_DC_THRESHOLD
1829 #define SET_STR_DC_THRESHOLD            750
1830 #endif
1831
1832 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
1833 #define SET_STR_PRECOMPUTE_THRESHOLD   2000
1834 #endif
1835
1836 /* Return non-zero if xp,xsize and yp,ysize overlap.
1837    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1838    overlap.  If both these are false, there's an overlap. */
1839 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1840   ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1841 #define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
1842   (   (char *) (xp) + (xsize) > (char *) (yp)   \
1843    && (char *) (yp) + (ysize) > (char *) (xp))
1844
1845 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1846    overlapping.  Return zero if they're partially overlapping. */
1847 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
1848   MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1849 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
1850   ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1851
1852 /* Return non-zero if dst,dsize and src,ssize are either identical or
1853    overlapping in a way suitable for an incrementing/decrementing algorithm.
1854    Return zero if they're partially overlapping in an unsuitable fashion. */
1855 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
1856   ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1857 #define MPN_SAME_OR_INCR_P(dst, src, size)      \
1858   MPN_SAME_OR_INCR2_P(dst, size, src, size)
1859 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
1860   ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1861 #define MPN_SAME_OR_DECR_P(dst, src, size)      \
1862   MPN_SAME_OR_DECR2_P(dst, size, src, size)
1863
1864
1865 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1866    ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1867    does it always.  Generally assertions are meant for development, but
1868    might help when looking for a problem later too.
1869
1870    Note that strings shouldn't be used within the ASSERT expression,
1871    eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1872    used in the !HAVE_STRINGIZE case (ie. K&R).  */
1873
1874 #ifdef __LINE__
1875 #define ASSERT_LINE  __LINE__
1876 #else
1877 #define ASSERT_LINE  -1
1878 #endif
1879
1880 #ifdef __FILE__
1881 #define ASSERT_FILE  __FILE__
1882 #else
1883 #define ASSERT_FILE  ""
1884 #endif
1885
1886 void __gmp_assert_header __GMP_PROTO ((const char *, int));
1887 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
1888
1889 #if HAVE_STRINGIZE
1890 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1891 #else
1892 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
1893 #endif
1894
1895 #define ASSERT_ALWAYS(expr)     \
1896   do {                          \
1897     if (!(expr))                \
1898       ASSERT_FAIL (expr);       \
1899   } while (0)
1900
1901 #if WANT_ASSERT
1902 #define ASSERT(expr)   ASSERT_ALWAYS (expr)
1903 #else
1904 #define ASSERT(expr)   do {} while (0)
1905 #endif
1906
1907
1908 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
1909    that it's zero.  In both cases if assertion checking is disabled the
1910    expression is still evaluated.  These macros are meant for use with
1911    routines like mpn_add_n() where the return value represents a carry or
1912    whatever that should or shouldn't occur in some context.  For example,
1913    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
1914 #if WANT_ASSERT
1915 #define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
1916 #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
1917 #else
1918 #define ASSERT_CARRY(expr)     (expr)
1919 #define ASSERT_NOCARRY(expr)   (expr)
1920 #endif
1921
1922
1923 /* ASSERT_CODE includes code when assertion checking is wanted.  This is the
1924    same as writing "#if WANT_ASSERT", but more compact.  */
1925 #if WANT_ASSERT
1926 #define ASSERT_CODE(expr)  expr
1927 #else
1928 #define ASSERT_CODE(expr)
1929 #endif
1930
1931
1932 /* Test that an mpq_t is in fully canonical form.  This can be used as
1933    protection on routines like mpq_equal which give wrong results on
1934    non-canonical inputs.  */
1935 #if WANT_ASSERT
1936 #define ASSERT_MPQ_CANONICAL(q)                         \
1937   do {                                                  \
1938     ASSERT (q->_mp_den._mp_size > 0);                   \
1939     if (q->_mp_num._mp_size == 0)                       \
1940       {                                                 \
1941         /* zero should be 0/1 */                        \
1942         ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
1943       }                                                 \
1944     else                                                \
1945       {                                                 \
1946         /* no common factors */                         \
1947         mpz_t  __g;                                     \
1948         mpz_init (__g);                                 \
1949         mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
1950         ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
1951         mpz_clear (__g);                                \
1952       }                                                 \
1953   } while (0)
1954 #else
1955 #define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
1956 #endif
1957
1958 /* Check that the nail parts are zero. */
1959 #define ASSERT_ALWAYS_LIMB(limb)                \
1960   do {                                          \
1961     mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
1962     ASSERT_ALWAYS (__nail == 0);                \
1963   } while (0)
1964 #define ASSERT_ALWAYS_MPN(ptr, size)            \
1965   do {                                          \
1966     /* let whole loop go dead when no nails */  \
1967     if (GMP_NAIL_BITS != 0)                     \
1968       {                                         \
1969         mp_size_t  __i;                         \
1970         for (__i = 0; __i < (size); __i++)      \
1971           ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
1972       }                                         \
1973   } while (0)
1974 #if WANT_ASSERT
1975 #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
1976 #define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
1977 #else
1978 #define ASSERT_LIMB(limb)       do {} while (0)
1979 #define ASSERT_MPN(ptr, size)   do {} while (0)
1980 #endif
1981
1982
1983 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
1984    size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
1985 #if WANT_ASSERT
1986 #define ASSERT_MPN_ZERO_P(ptr,size)     \
1987   do {                                  \
1988     mp_size_t  __i;                     \
1989     ASSERT ((size) >= 0);               \
1990     for (__i = 0; __i < (size); __i++)  \
1991       ASSERT ((ptr)[__i] == 0);         \
1992   } while (0)
1993 #define ASSERT_MPN_NONZERO_P(ptr,size)  \
1994   do {                                  \
1995     mp_size_t  __i;                     \
1996     int        __nonzero = 0;           \
1997     ASSERT ((size) >= 0);               \
1998     for (__i = 0; __i < (size); __i++)  \
1999       if ((ptr)[__i] != 0)              \
2000         {                               \
2001           __nonzero = 1;                \
2002           break;                        \
2003         }                               \
2004     ASSERT (__nonzero);                 \
2005   } while (0)
2006 #else
2007 #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
2008 #define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
2009 #endif
2010
2011
2012 #if HAVE_NATIVE_mpn_com_n
2013 #define mpn_com_n __MPN(com_n)
2014 void    mpn_com_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
2015 #else
2016 #define mpn_com_n(d,s,n)                                \
2017   do {                                                  \
2018     mp_ptr     __d = (d);                               \
2019     mp_srcptr  __s = (s);                               \
2020     mp_size_t  __n = (n);                               \
2021     ASSERT (__n >= 1);                                  \
2022     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
2023     do                                                  \
2024       *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
2025     while (--__n);                                      \
2026   } while (0)
2027 #endif
2028
2029 #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation)    \
2030   do {                                                  \
2031     mp_ptr       __d = (d);                             \
2032     mp_srcptr    __s1 = (s1);                           \
2033     mp_srcptr    __s2 = (s2);                           \
2034     mp_size_t    __n = (n);                             \
2035     ASSERT (__n >= 1);                                  \
2036     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n));   \
2037     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n));   \
2038     do                                                  \
2039       operation;                                        \
2040     while (--__n);                                      \
2041   } while (0)
2042
2043 #if HAVE_NATIVE_mpn_and_n
2044 #define mpn_and_n __MPN(and_n)
2045 void    mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2046 #else
2047 #define mpn_and_n(d, s1, s2, n) \
2048   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
2049 #endif
2050
2051 #if HAVE_NATIVE_mpn_andn_n
2052 #define mpn_andn_n __MPN(andn_n)
2053 void    mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2054 #else
2055 #define mpn_andn_n(d, s1, s2, n) \
2056   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
2057 #endif
2058
2059 #if HAVE_NATIVE_mpn_nand_n
2060 #define mpn_nand_n __MPN(nand_n)
2061 void    mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2062 #else
2063 #define mpn_nand_n(d, s1, s2, n) \
2064   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ & *__s2++) & GMP_NUMB_MASK)
2065 #endif
2066
2067 #if HAVE_NATIVE_mpn_ior_n
2068 #define mpn_ior_n __MPN(ior_n)
2069 void    mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2070 #else
2071 #define mpn_ior_n(d, s1, s2, n) \
2072   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
2073 #endif
2074
2075 #if HAVE_NATIVE_mpn_iorn_n
2076 #define mpn_iorn_n __MPN(iorn_n)
2077 void    mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2078 #else
2079 #define mpn_iorn_n(d, s1, s2, n) \
2080   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = (*__s1++ | ~*__s2++) & GMP_NUMB_MASK)
2081 #endif
2082
2083 #if HAVE_NATIVE_mpn_nior_n
2084 #define mpn_nior_n __MPN(nior_n)
2085 void    mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2086 #else
2087 #define mpn_nior_n(d, s1, s2, n) \
2088   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ | *__s2++) & GMP_NUMB_MASK)
2089 #endif
2090
2091 #if HAVE_NATIVE_mpn_xor_n
2092 #define mpn_xor_n __MPN(xor_n)
2093 void    mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2094 #else
2095 #define mpn_xor_n(d, s1, s2, n) \
2096   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
2097 #endif
2098
2099 #if HAVE_NATIVE_mpn_xnor_n
2100 #define mpn_xnor_n __MPN(xnor_n)
2101 void    mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
2102 #else
2103 #define mpn_xnor_n(d, s1, s2, n) \
2104   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ ^ *__s2++) & GMP_NUMB_MASK)
2105 #endif
2106
2107
2108 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2109 #if GMP_NAIL_BITS == 0
2110 #define ADDC_LIMB(cout, w, x, y)        \
2111   do {                                  \
2112     mp_limb_t  __x = (x);               \
2113     mp_limb_t  __y = (y);               \
2114     mp_limb_t  __w = __x + __y;         \
2115     (w) = __w;                          \
2116     (cout) = __w < __x;                 \
2117   } while (0)
2118 #else
2119 #define ADDC_LIMB(cout, w, x, y)        \
2120   do {                                  \
2121     mp_limb_t  __w;                     \
2122     ASSERT_LIMB (x);                    \
2123     ASSERT_LIMB (y);                    \
2124     __w = (x) + (y);                    \
2125     (w) = __w & GMP_NUMB_MASK;          \
2126     (cout) = __w >> GMP_NUMB_BITS;      \
2127   } while (0)
2128 #endif
2129
2130 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2131    subtract.  */
2132 #if GMP_NAIL_BITS == 0
2133 #define SUBC_LIMB(cout, w, x, y)        \
2134   do {                                  \
2135     mp_limb_t  __x = (x);               \
2136     mp_limb_t  __y = (y);               \
2137     mp_limb_t  __w = __x - __y;         \
2138     (w) = __w;                          \
2139     (cout) = __w > __x;                 \
2140   } while (0)
2141 #else
2142 #define SUBC_LIMB(cout, w, x, y)        \
2143   do {                                  \
2144     mp_limb_t  __w = (x) - (y);         \
2145     (w) = __w & GMP_NUMB_MASK;          \
2146     (cout) = __w >> (GMP_LIMB_BITS-1);  \
2147   } while (0)
2148 #endif
2149
2150
2151 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2152    expecting no carry (or borrow) from that.
2153
2154    The size parameter is only for the benefit of assertion checking.  In a
2155    normal build it's unused and the carry/borrow is just propagated as far
2156    as it needs to go.
2157
2158    On random data, usually only one or two limbs of {ptr,size} get updated,
2159    so there's no need for any sophisticated looping, just something compact
2160    and sensible.
2161
2162    FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2163    declaring their operand sizes, then remove the former.  This is purely
2164    for the benefit of assertion checking.  */
2165
2166 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
2167   && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
2168 /* Better flags handling than the generic C gives on i386, saving a few
2169    bytes of code and maybe a cycle or two.  */
2170
2171 #define MPN_IORD_U(ptr, incr, aors)                                     \
2172   do {                                                                  \
2173     mp_ptr  __ptr_dummy;                                                \
2174     if (__builtin_constant_p (incr) && (incr) == 1)                     \
2175       {                                                                 \
2176         __asm__ __volatile__                                            \
2177           ("\n" ASM_L(top) ":\n"                                        \
2178            "\t" aors " $1, (%0)\n"                                      \
2179            "\tleal 4(%0),%0\n"                                          \
2180            "\tjc " ASM_L(top)                                           \
2181            : "=r" (__ptr_dummy)                                         \
2182            : "0"  (ptr)                                                 \
2183            : "memory");                                                 \
2184       }                                                                 \
2185     else                                                                \
2186       {                                                                 \
2187         __asm__ __volatile__                                            \
2188           (   aors  " %2,(%0)\n"                                        \
2189            "\tjnc " ASM_L(done) "\n"                                    \
2190            ASM_L(top) ":\n"                                             \
2191            "\t" aors " $1,4(%0)\n"                                      \
2192            "\tleal 4(%0),%0\n"                                          \
2193            "\tjc " ASM_L(top) "\n"                                      \
2194            ASM_L(done) ":\n"                                            \
2195            : "=r" (__ptr_dummy)                                         \
2196            : "0"  (ptr),                                                \
2197              "ri" (incr)                                                \
2198            : "memory");                                                 \
2199       }                                                                 \
2200   } while (0)
2201
2202 #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
2203 #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
2204 #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
2205 #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
2206 #endif
2207
2208 #if GMP_NAIL_BITS == 0
2209 #ifndef mpn_incr_u
2210 #define mpn_incr_u(p,incr)                              \
2211   do {                                                  \
2212     mp_limb_t __x;                                      \
2213     mp_ptr __p = (p);                                   \
2214     if (__builtin_constant_p (incr) && (incr) == 1)     \
2215       {                                                 \
2216         while (++(*(__p++)) == 0)                       \
2217           ;                                             \
2218       }                                                 \
2219     else                                                \
2220       {                                                 \
2221         __x = *__p + (incr);                            \
2222         *__p = __x;                                     \
2223         if (__x < (incr))                               \
2224           while (++(*(++__p)) == 0)                     \
2225             ;                                           \
2226       }                                                 \
2227   } while (0)
2228 #endif
2229 #ifndef mpn_decr_u
2230 #define mpn_decr_u(p,incr)                              \
2231   do {                                                  \
2232     mp_limb_t __x;                                      \
2233     mp_ptr __p = (p);                                   \
2234     if (__builtin_constant_p (incr) && (incr) == 1)     \
2235       {                                                 \
2236         while ((*(__p++))-- == 0)                       \
2237           ;                                             \
2238       }                                                 \
2239     else                                                \
2240       {                                                 \
2241         __x = *__p;                                     \
2242         *__p = __x - (incr);                            \
2243         if (__x < (incr))                               \
2244           while ((*(++__p))-- == 0)                     \
2245             ;                                           \
2246       }                                                 \
2247   } while (0)
2248 #endif
2249 #endif
2250
2251 #if GMP_NAIL_BITS >= 1
2252 #ifndef mpn_incr_u
2253 #define mpn_incr_u(p,incr)                              \
2254   do {                                                  \
2255     mp_limb_t __x;                                      \
2256     mp_ptr __p = (p);                                   \
2257     if (__builtin_constant_p (incr) && (incr) == 1)     \
2258       {                                                 \
2259         do                                              \
2260           {                                             \
2261             __x = (*__p + 1) & GMP_NUMB_MASK;           \
2262             *__p++ = __x;                               \
2263           }                                             \
2264         while (__x == 0);                               \
2265       }                                                 \
2266     else                                                \
2267       {                                                 \
2268         __x = (*__p + (incr));                          \
2269         *__p++ = __x & GMP_NUMB_MASK;                   \
2270         if (__x >> GMP_NUMB_BITS != 0)                  \
2271           {                                             \
2272             do                                          \
2273               {                                         \
2274                 __x = (*__p + 1) & GMP_NUMB_MASK;       \
2275                 *__p++ = __x;                           \
2276               }                                         \
2277             while (__x == 0);                           \
2278           }                                             \
2279       }                                                 \
2280   } while (0)
2281 #endif
2282 #ifndef mpn_decr_u
2283 #define mpn_decr_u(p,incr)                              \
2284   do {                                                  \
2285     mp_limb_t __x;                                      \
2286     mp_ptr __p = (p);                                   \
2287     if (__builtin_constant_p (incr) && (incr) == 1)     \
2288       {                                                 \
2289         do                                              \
2290           {                                             \
2291             __x = *__p;                                 \
2292             *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
2293           }                                             \
2294         while (__x == 0);                               \
2295       }                                                 \
2296     else                                                \
2297       {                                                 \
2298         __x = *__p - (incr);                            \
2299         *__p++ = __x & GMP_NUMB_MASK;                   \
2300         if (__x >> GMP_NUMB_BITS != 0)                  \
2301           {                                             \
2302             do                                          \
2303               {                                         \
2304                 __x = *__p;                             \
2305                 *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
2306               }                                         \
2307             while (__x == 0);                           \
2308           }                                             \
2309       }                                                 \
2310   } while (0)
2311 #endif
2312 #endif
2313
2314 #ifndef MPN_INCR_U
2315 #if WANT_ASSERT
2316 #define MPN_INCR_U(ptr, size, n)                        \
2317   do {                                                  \
2318     ASSERT ((size) >= 1);                               \
2319     ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
2320   } while (0)
2321 #else
2322 #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
2323 #endif
2324 #endif
2325
2326 #ifndef MPN_DECR_U
2327 #if WANT_ASSERT
2328 #define MPN_DECR_U(ptr, size, n)                        \
2329   do {                                                  \
2330     ASSERT ((size) >= 1);                               \
2331     ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
2332   } while (0)
2333 #else
2334 #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
2335 #endif
2336 #endif
2337
2338
2339 /* Structure for conversion between internal binary format and
2340    strings in base 2..36.  */
2341 struct bases
2342 {
2343   /* Number of digits in the conversion base that always fits in an mp_limb_t.
2344      For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2345      is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */
2346   int chars_per_limb;
2347
2348   /* log(2)/log(conversion_base) */
2349   double chars_per_bit_exactly;
2350
2351   /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2352      factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
2353      i.e. the number of bits used to represent each digit in the base.  */
2354   mp_limb_t big_base;
2355
2356   /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
2357      fixed-point number.  Instead of dividing by big_base an application can
2358      choose to multiply by big_base_inverted.  */
2359   mp_limb_t big_base_inverted;
2360 };
2361
2362 #define   mp_bases __MPN(bases)
2363 #define __mp_bases __MPN(bases)
2364 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2365
2366
2367 /* For power of 2 bases this is exact.  For other bases the result is either
2368    exact or one too big.
2369
2370    To be exact always it'd be necessary to examine all the limbs of the
2371    operand, since numbers like 100..000 and 99...999 generally differ only
2372    in the lowest limb.  It'd be possible to examine just a couple of high
2373    limbs to increase the probability of being exact, but that doesn't seem
2374    worth bothering with.  */
2375
2376 #define MPN_SIZEINBASE(result, ptr, size, base)                         \
2377   do {                                                                  \
2378     int       __lb_base, __cnt;                                         \
2379     size_t __totbits;                                                   \
2380                                                                         \
2381     ASSERT ((size) >= 0);                                               \
2382     ASSERT ((base) >= 2);                                               \
2383     ASSERT ((base) < numberof (mp_bases));                              \
2384                                                                         \
2385     /* Special case for X == 0.  */                                     \
2386     if ((size) == 0)                                                    \
2387       (result) = 1;                                                     \
2388     else                                                                \
2389       {                                                                 \
2390         /* Calculate the total number of significant bits of X.  */     \
2391         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2392         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2393                                                                         \
2394         if (POW2_P (base))                                              \
2395           {                                                             \
2396             __lb_base = mp_bases[base].big_base;                        \
2397             (result) = (__totbits + __lb_base - 1) / __lb_base;         \
2398           }                                                             \
2399         else                                                            \
2400           (result) = (size_t)                                           \
2401             (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
2402       }                                                                 \
2403   } while (0)
2404
2405 /* eliminate mp_bases lookups for base==16 */
2406 #define MPN_SIZEINBASE_16(result, ptr, size)                            \
2407   do {                                                                  \
2408     int       __cnt;                                                    \
2409     mp_size_t __totbits;                                                \
2410                                                                         \
2411     ASSERT ((size) >= 0);                                               \
2412                                                                         \
2413     /* Special case for X == 0.  */                                     \
2414     if ((size) == 0)                                                    \
2415       (result) = 1;                                                     \
2416     else                                                                \
2417       {                                                                 \
2418         /* Calculate the total number of significant bits of X.  */     \
2419         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2420         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2421         (result) = (__totbits + 4 - 1) / 4;                             \
2422       }                                                                 \
2423   } while (0)
2424
2425 /* bit count to limb count, rounding up */
2426 #define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2427
2428 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
2429    mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
2430    in both cases (LIMBS_PER_ULONG is also defined here.) */
2431 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2432
2433 #define LIMBS_PER_ULONG 1
2434 #define MPN_SET_UI(zp, zn, u)   \
2435   (zp)[0] = (u);                \
2436   (zn) = ((zp)[0] != 0);
2437 #define MPZ_FAKE_UI(z, zp, u)   \
2438   (zp)[0] = (u);                \
2439   PTR (z) = (zp);               \
2440   SIZ (z) = ((zp)[0] != 0);     \
2441   ASSERT_CODE (ALLOC (z) = 1);
2442
2443 #else /* need two limbs per ulong */
2444
2445 #define LIMBS_PER_ULONG 2
2446 #define MPN_SET_UI(zp, zn, u)                          \
2447   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2448   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2449   (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2450 #define MPZ_FAKE_UI(z, zp, u)                          \
2451   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2452   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2453   SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2454   PTR (z) = (zp);                                      \
2455   ASSERT_CODE (ALLOC (z) = 2);
2456
2457 #endif
2458
2459
2460 #if HAVE_HOST_CPU_FAMILY_x86
2461 #define TARGET_REGISTER_STARVED 1
2462 #else
2463 #define TARGET_REGISTER_STARVED 0
2464 #endif
2465
2466
2467 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2468    or 0 there into a limb 0xFF..FF or 0 respectively.
2469
2470    On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2471    but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2472    a little compile-time test and a fallback to a "? :" form.  The latter is
2473    necessary for instance on Cray vector systems.
2474
2475    Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2476    to an arithmetic right shift anyway, but it's good to get the desired
2477    shift on past versions too (in particular since an important use of
2478    LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
2479
2480 #define LIMB_HIGHBIT_TO_MASK(n)                                 \
2481   (((mp_limb_signed_t) -1 >> 1) < 0                             \
2482    ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \
2483    : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2484
2485
2486 /* Use a library function for invert_limb, if available. */
2487 #define   mpn_invert_limb __MPN(invert_limb)
2488 mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2489 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2490 #define invert_limb(invxl,xl)           \
2491   do {                                  \
2492     (invxl) = mpn_invert_limb (xl);     \
2493   } while (0)
2494 #endif
2495
2496 #ifndef invert_limb
2497 #define invert_limb(invxl,xl)                   \
2498   do {                                          \
2499     mp_limb_t dummy;                            \
2500     ASSERT ((xl) != 0);                         \
2501     udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \
2502   } while (0)
2503 #endif
2504
2505 #ifndef udiv_qrnnd_preinv
2506 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3
2507 #endif
2508
2509 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2510    limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
2511    If this would yield overflow, DI should be the largest possible number
2512    (i.e., only ones).  For correct operation, the most significant bit of D
2513    has to be set.  Put the quotient in Q and the remainder in R.  */
2514 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)                         \
2515   do {                                                                  \
2516     mp_limb_t _q, _ql, _r;                                              \
2517     mp_limb_t _xh, _xl;                                                 \
2518     ASSERT ((d) != 0);                                                  \
2519     umul_ppmm (_q, _ql, (nh), (di));                                    \
2520     _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */      \
2521     umul_ppmm (_xh, _xl, _q, (d));                                      \
2522     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                         \
2523     if (_xh != 0)                                                       \
2524       {                                                                 \
2525         sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
2526         _q += 1;                                                        \
2527         if (_xh != 0)                                                   \
2528           {                                                             \
2529             _r -= (d);                                                  \
2530             _q += 1;                                                    \
2531           }                                                             \
2532       }                                                                 \
2533     if (_r >= (d))                                                      \
2534       {                                                                 \
2535         _r -= (d);                                                      \
2536         _q += 1;                                                        \
2537       }                                                                 \
2538     (r) = _r;                                                           \
2539     (q) = _q;                                                           \
2540   } while (0)
2541
2542 /* Like udiv_qrnnd_preinv, but branch-free. */
2543 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)                         \
2544   do {                                                                  \
2545     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2546     mp_limb_t _xh, _xl;                                                 \
2547     _n2 = (nh);                                                         \
2548     _n10 = (nl);                                                        \
2549     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2550     _nadj = _n10 + (_nmask & (d));                                      \
2551     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2552     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2553     _q1 = ~_xh;                                                         \
2554     umul_ppmm (_xh, _xl, _q1, d);                                       \
2555     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2556     _xh -= (d);                                 /* xh = 0 or -1 */      \
2557     (r) = _xl + ((d) & _xh);                                            \
2558     (q) = _xh - _q1;                                                    \
2559   } while (0)
2560
2561 /* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
2562    so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
2563 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2564   do {                                                                  \
2565     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2566     mp_limb_t _xh, _xl;                                                 \
2567     _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
2568     _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup));                         \
2569     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2570     _nadj = _n10 + (_nmask & (dnorm));                                  \
2571     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2572     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2573     _q1 = ~_xh;                                                         \
2574     umul_ppmm (_xh, _xl, _q1, d);                                       \
2575     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2576     _xh -= (d);                                                         \
2577     (r) = _xl + ((d) & _xh);                                            \
2578     (q) = _xh - _q1;                                                    \
2579   } while (0)
2580
2581 /* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
2582
2583    We write things strangely below, to help gcc.  A more straightforward
2584    version:
2585
2586    _r = (nl) - _qh * (d);
2587    _t = _r + (d);
2588    if (_r >= _ql)
2589      {
2590        _qh--;
2591        _r = _t;
2592      }
2593
2594    For one operation shorter critical path, one may want to use this form:
2595
2596    _p = _qh * (d)
2597    _s = (nl) + (d);
2598    _r = (nl) - _p;
2599    _t = _s - _p;
2600    if (_r >= _ql)
2601      {
2602        _qh--;
2603        _r = _t;
2604      }
2605 */
2606 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)                         \
2607   do {                                                                  \
2608     mp_limb_t _qh, _ql, _r;                                             \
2609     umul_ppmm (_qh, _ql, (nh), (di));                                   \
2610     if (__builtin_constant_p (nl) && (nl) == 0)                         \
2611       _qh += (nh) + 1;                                                  \
2612     else                                                                \
2613       add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                  \
2614     _r = (nl) - _qh * (d);                                              \
2615     if (_r > _ql)       /* both > and >= should be OK */                \
2616       {                                                                 \
2617         _r += (d);                                                      \
2618         _qh--;                                                          \
2619       }                                                                 \
2620     if (UNLIKELY (_r >= (d)))                                           \
2621       {                                                                 \
2622         _r -= (d);                                                      \
2623         _qh++;                                                          \
2624       }                                                                 \
2625     (r) = _r;                                                           \
2626     (q) = _qh;                                                          \
2627   } while (0)
2628
2629 /* Compute r = nh*B mod d, where di is the inverse of d.  */
2630 #define udiv_rnd_preinv(r, nh, d, di)                                   \
2631   do {                                                                  \
2632     mp_limb_t _qh, _ql, _r;                                             \
2633     umul_ppmm (_qh, _ql, (nh), (di));                                   \
2634     _qh += (nh) + 1;                                                    \
2635     _r = - _qh * (d);                                                   \
2636     if (_r > _ql)                                                       \
2637       _r += (d);                                                        \
2638     (r) = _r;                                                           \
2639   } while (0)
2640
2641 #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
2642 #define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2643 mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2644 #endif
2645
2646
2647 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
2648    the plain mpn_divrem_1.  Likewise USE_PREINV_MOD_1 chooses between
2649    mpn_preinv_mod_1 and plain mpn_mod_1.  The default for both is yes, since
2650    the few CISC chips where preinv is not good have defines saying so.  */
2651 #ifndef USE_PREINV_DIVREM_1
2652 #define USE_PREINV_DIVREM_1   1
2653 #endif
2654 #ifndef USE_PREINV_MOD_1
2655 #define USE_PREINV_MOD_1   1
2656 #endif
2657
2658 #if USE_PREINV_DIVREM_1
2659 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2660   mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2661 #else
2662 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2663   mpn_divrem_1 (qp, xsize, ap, size, d)
2664 #endif
2665
2666 #if USE_PREINV_MOD_1
2667 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
2668   mpn_preinv_mod_1 (src, size, divisor, inverse)
2669 #else
2670 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
2671   mpn_mod_1 (src, size, divisor)
2672 #endif
2673
2674
2675 #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
2676 #define   mpn_mod_34lsub1 __MPN(mod_34lsub1)
2677 mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2678 #endif
2679
2680
2681 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2682    plain mpn_divrem_1.  Likewise MODEXACT_1_ODD_THRESHOLD for
2683    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
2684    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
2685    where this is not right have a tuned threshold.  */
2686 #ifndef DIVEXACT_1_THRESHOLD
2687 #define DIVEXACT_1_THRESHOLD  0
2688 #endif
2689 #ifndef MODEXACT_1_ODD_THRESHOLD
2690 #define MODEXACT_1_ODD_THRESHOLD  0
2691 #endif
2692
2693 #ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
2694 #define mpn_divexact_1 __MPN(divexact_1)
2695 void    mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2696 #endif
2697
2698 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
2699   do {                                                                        \
2700     if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
2701       ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2702     else                                                                      \
2703       {                                                                       \
2704         ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
2705         mpn_divexact_1 (dst, src, size, divisor);                             \
2706       }                                                                       \
2707   } while (0)
2708
2709 #ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
2710 #define   mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2711 mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2712 #endif
2713
2714 #if HAVE_NATIVE_mpn_modexact_1_odd
2715 #define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
2716 mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2717 #else
2718 #define mpn_modexact_1_odd(src,size,divisor) \
2719   mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2720 #endif
2721
2722 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)                     \
2723   (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD)                     \
2724    ? mpn_modexact_1_odd (src, size, divisor)                            \
2725    : mpn_mod_1 (src, size, divisor))
2726
2727
2728 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
2729    2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2730    n must be odd (otherwise such an inverse doesn't exist).
2731
2732    This is not to be confused with invert_limb(), which is completely
2733    different.
2734
2735    The table lookup gives an inverse with the low 8 bits valid, and each
2736    multiply step doubles the number of bits.  See Jebelean "An algorithm for
2737    exact division" end of section 4 (reference in gmp.texi).
2738
2739    Possible enhancement: Could use UHWtype until the last step, if half-size
2740    multiplies are faster (might help under _LONG_LONG_LIMB).
2741
2742    Alternative: As noted in Granlund and Montgomery "Division by Invariant
2743    Integers using Multiplication" (reference in gmp.texi), n itself gives a
2744    3-bit inverse immediately, and could be used instead of a table lookup.
2745    A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2746    bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
2747
2748 #define binvert_limb_table  __gmp_binvert_limb_table
2749 __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
2750
2751 #define binvert_limb(inv,n)                                             \
2752   do {                                                                  \
2753     mp_limb_t  __n = (n);                                               \
2754     mp_limb_t  __inv;                                                   \
2755     ASSERT ((__n & 1) == 1);                                            \
2756                                                                         \
2757     __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */                \
2758     if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;   \
2759     if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;   \
2760     if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;   \
2761                                                                         \
2762     if (GMP_NUMB_BITS > 64)                                             \
2763       {                                                                 \
2764         int  __invbits = 64;                                            \
2765         do {                                                            \
2766           __inv = 2 * __inv - __inv * __inv * __n;                      \
2767           __invbits *= 2;                                               \
2768         } while (__invbits < GMP_NUMB_BITS);                            \
2769       }                                                                 \
2770                                                                         \
2771     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                        \
2772     (inv) = __inv & GMP_NUMB_MASK;                                      \
2773   } while (0)
2774 #define modlimb_invert binvert_limb  /* backward compatibility */
2775
2776 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2777    Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2778    GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2779    we need to start from GMP_NUMB_MAX>>1. */
2780 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2781
2782 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2783    These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2784 #define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
2785 #define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2786
2787
2788 /* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
2789
2790    It's not clear whether this is the best way to do this calculation.
2791    Anything congruent to -a would be fine for the one limb congruence
2792    tests.  */
2793
2794 #define NEG_MOD(r, a, d)                                                \
2795   do {                                                                  \
2796     ASSERT ((d) != 0);                                                  \
2797     ASSERT_LIMB (a);                                                    \
2798     ASSERT_LIMB (d);                                                    \
2799                                                                         \
2800     if ((a) <= (d))                                                     \
2801       {                                                                 \
2802         /* small a is reasonably likely */                              \
2803         (r) = (d) - (a);                                                \
2804       }                                                                 \
2805     else                                                                \
2806       {                                                                 \
2807         unsigned   __twos;                                              \
2808         mp_limb_t  __dnorm;                                             \
2809         count_leading_zeros (__twos, d);                                \
2810         __twos -= GMP_NAIL_BITS;                                        \
2811         __dnorm = (d) << __twos;                                        \
2812         (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);             \
2813       }                                                                 \
2814                                                                         \
2815     ASSERT_LIMB (r);                                                    \
2816   } while (0)
2817
2818 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2819 #define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
2820
2821
2822 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2823    to 0 if there's an even number.  "n" should be an unsigned long and "p"
2824    an int.  */
2825
2826 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2827 #define ULONG_PARITY(p, n)                                              \
2828   do {                                                                  \
2829     int __p;                                                            \
2830     __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));                    \
2831     (p) = __p & 1;                                                      \
2832   } while (0)
2833 #endif
2834
2835 /* Cray intrinsic _popcnt. */
2836 #ifdef _CRAY
2837 #define ULONG_PARITY(p, n)      \
2838   do {                          \
2839     (p) = _popcnt (n) & 1;      \
2840   } while (0)
2841 #endif
2842
2843 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2844     && ! defined (NO_ASM) && defined (__ia64)
2845 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
2846    to a 64 bit unsigned long long for popcnt */
2847 #define ULONG_PARITY(p, n)                                              \
2848   do {                                                                  \
2849     unsigned long long  __n = (unsigned long) (n);                      \
2850     int  __p;                                                           \
2851     __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));                \
2852     (p) = __p & 1;                                                      \
2853   } while (0)
2854 #endif
2855
2856 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2857     && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
2858 #if __GMP_GNUC_PREREQ (3,1)
2859 #define __GMP_qm "=Qm"
2860 #define __GMP_q "=Q"
2861 #else
2862 #define __GMP_qm "=qm"
2863 #define __GMP_q "=q"
2864 #endif
2865 #define ULONG_PARITY(p, n)                                              \
2866   do {                                                                  \
2867     char           __p;                                                 \
2868     unsigned long  __n = (n);                                           \
2869     __n ^= (__n >> 16);                                                 \
2870     __asm__ ("xorb %h1, %b1\n\t"                                        \
2871              "setpo %0"                                                 \
2872          : __GMP_qm (__p), __GMP_q (__n)                                \
2873          : "1" (__n));                                                  \
2874     (p) = __p;                                                          \
2875   } while (0)
2876 #endif
2877
2878 #if ! defined (ULONG_PARITY)
2879 #define ULONG_PARITY(p, n)                                              \
2880   do {                                                                  \
2881     unsigned long  __n = (n);                                           \
2882     int  __p = 0;                                                       \
2883     do                                                                  \
2884       {                                                                 \
2885         __p ^= 0x96696996L >> (__n & 0x1F);                             \
2886         __n >>= 5;                                                      \
2887       }                                                                 \
2888     while (__n != 0);                                                   \
2889                                                                         \
2890     (p) = __p & 1;                                                      \
2891   } while (0)
2892 #endif
2893
2894
2895 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
2896    version 3.1 at least) doesn't seem to know how to generate rlwimi for
2897    anything other than bit-fields, so use "asm".  */
2898 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
2899   && HAVE_HOST_CPU_FAMILY_powerpc && BITS_PER_MP_LIMB == 32
2900 #define BSWAP_LIMB(dst, src)                                            \
2901   do {                                                                  \
2902     mp_limb_t  __bswapl_src = (src);                                    \
2903     mp_limb_t  __tmp1 = __bswapl_src >> 24;             /* low byte */  \
2904     mp_limb_t  __tmp2 = __bswapl_src << 24;             /* high byte */ \
2905     __asm__ ("rlwimi %0, %2, 24, 16, 23"                /* 2nd low */   \
2906          : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));           \
2907     __asm__ ("rlwimi %0, %2,  8,  8, 15"                /* 3nd high */  \
2908          : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));           \
2909     (dst) = __tmp1 | __tmp2;                            /* whole */     \
2910   } while (0)
2911 #endif
2912
2913 /* bswap is available on i486 and up and is fast.  A combination rorw $8 /
2914    roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
2915    kernel with xchgb instead of rorw), but this is not done here, because
2916    i386 means generic x86 and mixing word and dword operations will cause
2917    partial register stalls on P6 chips.  */
2918 #if defined (__GNUC__) && ! defined (NO_ASM)            \
2919   && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
2920   && BITS_PER_MP_LIMB == 32
2921 #define BSWAP_LIMB(dst, src)                                            \
2922   do {                                                                  \
2923     __asm__ ("bswap %0" : "=r" (dst) : "0" (src));                      \
2924   } while (0)
2925 #endif
2926
2927 #if defined (__GNUC__) && ! defined (NO_ASM)            \
2928   && defined (__amd64__) && BITS_PER_MP_LIMB == 64
2929 #define BSWAP_LIMB(dst, src)                                            \
2930   do {                                                                  \
2931     __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));                     \
2932   } while (0)
2933 #endif
2934
2935 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2936     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2937 #define BSWAP_LIMB(dst, src)                                            \
2938   do {                                                                  \
2939     __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));           \
2940   } while (0)
2941 #endif
2942
2943 /* As per glibc. */
2944 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
2945   && HAVE_HOST_CPU_FAMILY_m68k && BITS_PER_MP_LIMB == 32
2946 #define BSWAP_LIMB(dst, src)                                            \
2947   do {                                                                  \
2948     mp_limb_t  __bswapl_src = (src);                                    \
2949     __asm__ ("ror%.w %#8, %0\n\t"                                       \
2950              "swap   %0\n\t"                                            \
2951              "ror%.w %#8, %0"                                           \
2952              : "=d" (dst)                                               \
2953              : "0" (__bswapl_src));                                     \
2954   } while (0)
2955 #endif
2956
2957 #if ! defined (BSWAP_LIMB)
2958 #if BITS_PER_MP_LIMB == 8
2959 #define BSWAP_LIMB(dst, src)            \
2960   do { (dst) = (src); } while (0)
2961 #endif
2962 #if BITS_PER_MP_LIMB == 16
2963 #define BSWAP_LIMB(dst, src)                    \
2964   do {                                          \
2965     (dst) = ((src) << 8) + ((src) >> 8);        \
2966   } while (0)
2967 #endif
2968 #if BITS_PER_MP_LIMB == 32
2969 #define BSWAP_LIMB(dst, src)    \
2970   do {                          \
2971     (dst) =                     \
2972       ((src) << 24)             \
2973       + (((src) & 0xFF00) << 8) \
2974       + (((src) >> 8) & 0xFF00) \
2975       + ((src) >> 24);          \
2976   } while (0)
2977 #endif
2978 #if BITS_PER_MP_LIMB == 64
2979 #define BSWAP_LIMB(dst, src)            \
2980   do {                                  \
2981     (dst) =                             \
2982       ((src) << 56)                     \
2983       + (((src) & 0xFF00) << 40)        \
2984       + (((src) & 0xFF0000) << 24)      \
2985       + (((src) & 0xFF000000) << 8)     \
2986       + (((src) >> 8) & 0xFF000000)     \
2987       + (((src) >> 24) & 0xFF0000)      \
2988       + (((src) >> 40) & 0xFF00)        \
2989       + ((src) >> 56);                  \
2990   } while (0)
2991 #endif
2992 #endif
2993
2994 #if ! defined (BSWAP_LIMB)
2995 #define BSWAP_LIMB(dst, src)                            \
2996   do {                                                  \
2997     mp_limb_t  __bswapl_src = (src);                    \
2998     mp_limb_t  __dst = 0;                               \
2999     int        __i;                                     \
3000     for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \
3001       {                                                 \
3002         __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \
3003         __bswapl_src >>= 8;                             \
3004       }                                                 \
3005     (dst) = __dst;                                      \
3006   } while (0)
3007 #endif
3008
3009
3010 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3011    those we know are fast.  */
3012 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
3013   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
3014   && (HAVE_HOST_CPU_powerpc604                                          \
3015       || HAVE_HOST_CPU_powerpc604e                                      \
3016       || HAVE_HOST_CPU_powerpc750                                       \
3017       || HAVE_HOST_CPU_powerpc7400)
3018 #define BSWAP_LIMB_FETCH(limb, src)                                     \
3019   do {                                                                  \
3020     mp_srcptr  __blf_src = (src);                                       \
3021     mp_limb_t  __limb;                                                  \
3022     __asm__ ("lwbrx %0, 0, %1"                                          \
3023              : "=r" (__limb)                                            \
3024              : "r" (__blf_src),                                         \
3025                "m" (*__blf_src));                                       \
3026     (limb) = __limb;                                                    \
3027   } while (0)
3028 #endif
3029
3030 #if ! defined (BSWAP_LIMB_FETCH)
3031 #define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
3032 #endif
3033
3034
3035 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3036    know are fast.  FIXME: Is this necessary?  */
3037 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
3038   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
3039   && (HAVE_HOST_CPU_powerpc604                                          \
3040       || HAVE_HOST_CPU_powerpc604e                                      \
3041       || HAVE_HOST_CPU_powerpc750                                       \
3042       || HAVE_HOST_CPU_powerpc7400)
3043 #define BSWAP_LIMB_STORE(dst, limb)                                     \
3044   do {                                                                  \
3045     mp_ptr     __dst = (dst);                                           \
3046     mp_limb_t  __limb = (limb);                                         \
3047     __asm__ ("stwbrx %1, 0, %2"                                         \
3048              : "=m" (*__dst)                                            \
3049              : "r" (__limb),                                            \
3050                "r" (__dst));                                            \
3051   } while (0)
3052 #endif
3053
3054 #if ! defined (BSWAP_LIMB_STORE)
3055 #define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
3056 #endif
3057
3058
3059 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3060 #define MPN_BSWAP(dst, src, size)                       \
3061   do {                                                  \
3062     mp_ptr     __dst = (dst);                           \
3063     mp_srcptr  __src = (src);                           \
3064     mp_size_t  __size = (size);                         \
3065     mp_size_t  __i;                                     \
3066     ASSERT ((size) >= 0);                               \
3067     ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
3068     CRAY_Pragma ("_CRI ivdep");                         \
3069     for (__i = 0; __i < __size; __i++)                  \
3070       {                                                 \
3071         BSWAP_LIMB_FETCH (*__dst, __src);               \
3072         __dst++;                                        \
3073         __src++;                                        \
3074       }                                                 \
3075   } while (0)
3076
3077 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3078 #define MPN_BSWAP_REVERSE(dst, src, size)               \
3079   do {                                                  \
3080     mp_ptr     __dst = (dst);                           \
3081     mp_size_t  __size = (size);                         \
3082     mp_srcptr  __src = (src) + __size - 1;              \
3083     mp_size_t  __i;                                     \
3084     ASSERT ((size) >= 0);                               \
3085     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
3086     CRAY_Pragma ("_CRI ivdep");                         \
3087     for (__i = 0; __i < __size; __i++)                  \
3088       {                                                 \
3089         BSWAP_LIMB_FETCH (*__dst, __src);               \
3090         __dst++;                                        \
3091         __src--;                                        \
3092       }                                                 \
3093   } while (0)
3094
3095
3096 /* No processor claiming to be SPARC v9 compliant seems to
3097    implement the POPC instruction.  Disable pattern for now.  */
3098 #if 0
3099 #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
3100 #define popc_limb(result, input)                                        \
3101   do {                                                                  \
3102     DItype __res;                                                       \
3103     __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));              \
3104   } while (0)
3105 #endif
3106 #endif
3107
3108 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3109 #define popc_limb(result, input)                                        \
3110   do {                                                                  \
3111     __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));             \
3112   } while (0)
3113 #endif
3114
3115 /* Cray intrinsic. */
3116 #ifdef _CRAY
3117 #define popc_limb(result, input)        \
3118   do {                                  \
3119     (result) = _popcnt (input);         \
3120   } while (0)
3121 #endif
3122
3123 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
3124     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3125 #define popc_limb(result, input)                                        \
3126   do {                                                                  \
3127     __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));           \
3128   } while (0)
3129 #endif
3130
3131 /* Cool population count of an mp_limb_t.
3132    You have to figure out how this works, We won't tell you!
3133
3134    The constants could also be expressed as:
3135      0x55... = [2^N / 3]     = [(2^N-1)/3]
3136      0x33... = [2^N / 5]     = [(2^N-1)/5]
3137      0x0f... = [2^N / 17]    = [(2^N-1)/17]
3138      (N is GMP_LIMB_BITS, [] denotes truncation.) */
3139
3140 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3141 #define popc_limb(result, input)                                        \
3142   do {                                                                  \
3143     mp_limb_t  __x = (input);                                           \
3144     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3145     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3146     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3147     (result) = __x & 0xff;                                              \
3148   } while (0)
3149 #endif
3150
3151 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3152 #define popc_limb(result, input)                                        \
3153   do {                                                                  \
3154     mp_limb_t  __x = (input);                                           \
3155     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3156     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3157     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3158     __x = ((__x >> 8) + __x);                                           \
3159     (result) = __x & 0xff;                                              \
3160   } while (0)
3161 #endif
3162
3163 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3164 #define popc_limb(result, input)                                        \
3165   do {                                                                  \
3166     mp_limb_t  __x = (input);                                           \
3167     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3168     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3169     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3170     __x = ((__x >> 8) + __x);                                           \
3171     __x = ((__x >> 16) + __x);                                          \
3172     (result) = __x & 0xff;                                              \
3173   } while (0)
3174 #endif
3175
3176 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3177 #define popc_limb(result, input)                                        \
3178   do {                                                                  \
3179     mp_limb_t  __x = (input);                                           \
3180     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3181     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3182     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3183     __x = ((__x >> 8) + __x);                                           \
3184     __x = ((__x >> 16) + __x);                                          \
3185     __x = ((__x >> 32) + __x);                                          \
3186     (result) = __x & 0xff;                                              \
3187   } while (0)
3188 #endif
3189
3190
3191 /* Define stuff for longlong.h.  */
3192 #if HAVE_ATTRIBUTE_MODE
3193 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
3194 typedef          int SItype     __attribute__ ((mode (SI)));
3195 typedef unsigned int USItype    __attribute__ ((mode (SI)));
3196 typedef          int DItype     __attribute__ ((mode (DI)));
3197 typedef unsigned int UDItype    __attribute__ ((mode (DI)));
3198 #else
3199 typedef unsigned char UQItype;
3200 typedef          long SItype;
3201 typedef unsigned long USItype;
3202 #if HAVE_LONG_LONG
3203 typedef long long int DItype;
3204 typedef unsigned long long int UDItype;
3205 #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
3206 typedef long int DItype;
3207 typedef unsigned long int UDItype;
3208 #endif
3209 #endif
3210
3211 typedef mp_limb_t UWtype;
3212 typedef unsigned int UHWtype;
3213 #define W_TYPE_SIZE BITS_PER_MP_LIMB
3214
3215 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3216
3217    Bit field packing is "implementation defined" according to C99, which
3218    leaves us at the compiler's mercy here.  For some systems packing is
3219    defined in the ABI (eg. x86).  In any case so far it seems universal that
3220    little endian systems pack from low to high, and big endian from high to
3221    low within the given type.
3222
3223    Within the fields we rely on the integer endianness being the same as the
3224    float endianness, this is true everywhere we know of and it'd be a fairly
3225    strange system that did anything else.  */
3226
3227 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3228 #define _GMP_IEEE_FLOATS 1
3229 union ieee_double_extract
3230 {
3231   struct
3232     {
3233       gmp_uint_least32_t manh:20;
3234       gmp_uint_least32_t exp:11;
3235       gmp_uint_least32_t sig:1;
3236       gmp_uint_least32_t manl:32;
3237     } s;
3238   double d;
3239 };
3240 #endif
3241
3242 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3243 #define _GMP_IEEE_FLOATS 1
3244 union ieee_double_extract
3245 {
3246   struct
3247     {
3248       gmp_uint_least32_t manl:32;
3249       gmp_uint_least32_t manh:20;
3250       gmp_uint_least32_t exp:11;
3251       gmp_uint_least32_t sig:1;
3252     } s;
3253   double d;
3254 };
3255 #endif
3256
3257 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3258 #define _GMP_IEEE_FLOATS 1
3259 union ieee_double_extract
3260 {
3261   struct
3262     {
3263       gmp_uint_least32_t sig:1;
3264       gmp_uint_least32_t exp:11;
3265       gmp_uint_least32_t manh:20;
3266       gmp_uint_least32_t manl:32;
3267     } s;
3268   double d;
3269 };
3270 #endif
3271
3272
3273 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3274    that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
3275 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3276 /* Maximum number of limbs it will take to store any `double'.
3277    We assume doubles have 53 mantissa bits.  */
3278 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3279
3280 int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
3281
3282 #define mpn_get_d __gmpn_get_d
3283 double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
3284
3285
3286 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3287    a_inf if x is an infinity.  Both are considered unlikely values, for
3288    branch prediction.  */
3289
3290 #if _GMP_IEEE_FLOATS
3291 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3292   do {                                          \
3293     union ieee_double_extract  u;               \
3294     u.d = (x);                                  \
3295     if (UNLIKELY (u.s.exp == 0x7FF))            \
3296       {                                         \
3297         if (u.s.manl == 0 && u.s.manh == 0)     \
3298           { a_inf; }                            \
3299         else                                    \
3300           { a_nan; }                            \
3301       }                                         \
3302   } while (0)
3303 #endif
3304
3305 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3306 /* no nans or infs in these formats */
3307 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3308   do { } while (0)
3309 #endif
3310
3311 #ifndef DOUBLE_NAN_INF_ACTION
3312 /* Unknown format, try something generic.
3313    NaN should be "unordered", so x!=x.
3314    Inf should be bigger than DBL_MAX.  */
3315 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \
3316   do {                                                          \
3317     {                                                           \
3318       if (UNLIKELY ((x) != (x)))                                \
3319         { a_nan; }                                              \
3320       else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \
3321         { a_inf; }                                              \
3322     }                                                           \
3323   } while (0)
3324 #endif
3325
3326 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3327    in the coprocessor, which means a bigger exponent range than normal, and
3328    depending on the rounding mode, a bigger mantissa than normal.  (See
3329    "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
3330    "d" through memory to force any rounding and overflows to occur.
3331
3332    On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3333    registers, where there's no such extra precision and no need for the
3334    FORCE_DOUBLE.  We don't bother to detect this since the present uses for
3335    FORCE_DOUBLE are only in test programs and default generic C code.
3336
3337    Not quite sure that an "automatic volatile" will use memory, but it does
3338    in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3339    apparently matching operands like "0" are only allowed on a register
3340    output.  gcc 3.4 warns about this, though in fact it and past versions
3341    seem to put the operand through memory as hoped.  */
3342
3343 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
3344      || defined (__amd64__))
3345 #define FORCE_DOUBLE(d) \
3346   do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3347 #else
3348 #define FORCE_DOUBLE(d)  do { } while (0)
3349 #endif
3350
3351
3352 extern int __gmp_junk;
3353 extern const int __gmp_0;
3354 void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
3355 void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3356 void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3357 void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3358 #define GMP_ERROR(code)   __gmp_exception (code)
3359 #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
3360 #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
3361
3362 #if defined _LONG_LONG_LIMB
3363 #if __GMP_HAVE_TOKEN_PASTE
3364 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3365 #else
3366 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
3367 #endif
3368 #else /* not _LONG_LONG_LIMB */
3369 #if __GMP_HAVE_TOKEN_PASTE
3370 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3371 #else
3372 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
3373 #endif
3374 #endif /* _LONG_LONG_LIMB */
3375
3376 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3377 #if GMP_NUMB_BITS == 2
3378 #define PP 0x3                                  /* 3 */
3379 #define PP_FIRST_OMITTED 5
3380 #endif
3381 #if GMP_NUMB_BITS == 4
3382 #define PP 0xF                                  /* 3 x 5 */
3383 #define PP_FIRST_OMITTED 7
3384 #endif
3385 #if GMP_NUMB_BITS == 8
3386 #define PP 0x69                                 /* 3 x 5 x 7 */
3387 #define PP_FIRST_OMITTED 11
3388 #endif
3389 #if GMP_NUMB_BITS == 16
3390 #define PP 0x3AA7                               /* 3 x 5 x 7 x 11 x 13 */
3391 #define PP_FIRST_OMITTED 17
3392 #endif
3393 #if GMP_NUMB_BITS == 32
3394 #define PP 0xC0CFD797L                          /* 3 x 5 x 7 x 11 x ... x 29 */
3395 #define PP_INVERTED 0x53E5645CL
3396 #define PP_FIRST_OMITTED 31
3397 #endif
3398 #if GMP_NUMB_BITS == 64
3399 #define PP CNST_LIMB(0xE221F97C30E94E1D)        /* 3 x 5 x 7 x 11 x ... x 53 */
3400 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3401 #define PP_FIRST_OMITTED 59
3402 #endif
3403 #ifndef PP_FIRST_OMITTED
3404 #define PP_FIRST_OMITTED 3
3405 #endif
3406
3407
3408
3409 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3410    zero bit representing +1 and a one bit representing -1.  Bits other than
3411    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
3412    used to ensure the expressions are "int"s even if a and/or b might be
3413    other types.
3414
3415    JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3416    and their speed is important.  Expressions are used rather than
3417    conditionals to accumulate sign changes, which effectively means XORs
3418    instead of conditional JUMPs. */
3419
3420 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3421 #define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
3422
3423 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3424 #define JACOBI_U0(a)   ((a) == 1)
3425
3426 /* (a/0), with a given by low and size;
3427    is 1 if a=+/-1, 0 otherwise */
3428 #define JACOBI_LS0(alow,asize) \
3429   (((asize) == 1 || (asize) == -1) && (alow) == 1)
3430
3431 /* (a/0), with a an mpz_t;
3432    fetch of low limb always valid, even if size is zero */
3433 #define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
3434
3435 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3436 #define JACOBI_0U(b)   ((b) == 1)
3437
3438 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3439 #define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
3440
3441 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3442 #define JACOBI_0LS(blow,bsize) \
3443   (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3444
3445 /* Convert a bit1 to +1 or -1. */
3446 #define JACOBI_BIT1_TO_PN(result_bit1) \
3447   (1 - ((int) (result_bit1) & 2))
3448
3449 /* (2/b), with b unsigned and odd;
3450    is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3451    hence obtained from (b>>1)^b */
3452 #define JACOBI_TWO_U_BIT1(b) \
3453   ((int) (((b) >> 1) ^ (b)))
3454
3455 /* (2/b)^twos, with b unsigned and odd */
3456 #define JACOBI_TWOS_U_BIT1(twos, b) \
3457   ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3458
3459 /* (2/b)^twos, with b unsigned and odd */
3460 #define JACOBI_TWOS_U(twos, b) \
3461   (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3462
3463 /* (-1/b), with b odd (signed or unsigned);
3464    is (-1)^((b-1)/2) */
3465 #define JACOBI_N1B_BIT1(b) \
3466   ((int) (b))
3467
3468 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3469    is (-1/b) if a<0, or +1 if a>=0 */
3470 #define JACOBI_ASGN_SU_BIT1(a, b) \
3471   ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3472
3473 /* (a/b) effect due to sign of b: signed/signed;
3474    is -1 if a and b both negative, +1 otherwise */
3475 #define JACOBI_BSGN_SS_BIT1(a, b) \
3476   ((((a)<0) & ((b)<0)) << 1)
3477
3478 /* (a/b) effect due to sign of b: signed/mpz;
3479    is -1 if a and b both negative, +1 otherwise */
3480 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3481   JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3482
3483 /* (a/b) effect due to sign of b: mpz/signed;
3484    is -1 if a and b both negative, +1 otherwise */
3485 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3486   JACOBI_BSGN_SZ_BIT1 (b, a)
3487
3488 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3489    is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3490    both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
3491    because this is used in a couple of places with only bit 1 of a or b
3492    valid. */
3493 #define JACOBI_RECIP_UU_BIT1(a, b) \
3494   ((int) ((a) & (b)))
3495
3496 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3497    decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
3498    updated for the new b_ptr.  result_bit1 is updated according to the
3499    factors of 2 stripped, as per (a/2).  */
3500 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \
3501   do {                                                                  \
3502     ASSERT ((b_size) >= 1);                                             \
3503     ASSERT ((b_low) == (b_ptr)[0]);                                     \
3504                                                                         \
3505     while (UNLIKELY ((b_low) == 0))                                     \
3506       {                                                                 \
3507         (b_size)--;                                                     \
3508         ASSERT ((b_size) >= 1);                                         \
3509         (b_ptr)++;                                                      \
3510         (b_low) = *(b_ptr);                                             \
3511                                                                         \
3512         ASSERT (((a) & 1) != 0);                                        \
3513         if ((GMP_NUMB_BITS % 2) == 1)                                   \
3514           (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
3515       }                                                                 \
3516   } while (0)
3517
3518 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3519    modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
3520    unsigned.  modexact_1_odd effectively calculates -a mod b, and
3521    result_bit1 is adjusted for the factor of -1.
3522
3523    The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3524    sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3525    factor to introduce into result_bit1, so for that case use mpn_mod_1
3526    unconditionally.
3527
3528    FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3529    for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
3530    or not skip a divide step, or something. */
3531
3532 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3533   do {                                                                     \
3534     mp_srcptr  __a_ptr  = (a_ptr);                                         \
3535     mp_size_t  __a_size = (a_size);                                        \
3536     mp_limb_t  __b      = (b);                                             \
3537                                                                            \
3538     ASSERT (__a_size >= 1);                                                \
3539     ASSERT (__b & 1);                                                      \
3540                                                                            \
3541     if ((GMP_NUMB_BITS % 2) != 0                                           \
3542         || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD))           \
3543       {                                                                    \
3544         (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
3545       }                                                                    \
3546     else                                                                   \
3547       {                                                                    \
3548         (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
3549         (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
3550       }                                                                    \
3551   } while (0)
3552
3553 /* Matrix multiplication */
3554 #define   mpn_matrix22_mul __MPN(matrix22_mul)
3555 void      mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3556 #define   mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
3557 void      mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3558 #define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
3559 mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
3560
3561 #ifndef MATRIX22_STRASSEN_THRESHOLD
3562 #define MATRIX22_STRASSEN_THRESHOLD 30
3563 #endif
3564
3565 /* HGCD definitions */
3566
3567 /* Extract one numb, shifting count bits left
3568     ________  ________
3569    |___xh___||___xl___|
3570           |____r____|
3571    >count <
3572
3573    The count includes any nail bits, so it should work fine if count
3574    is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
3575    xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
3576
3577    FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
3578    those calls where the count high bits of xh may be non-zero.
3579 */
3580
3581 #define MPN_EXTRACT_NUMB(count, xh, xl)                         \
3582   ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |      \
3583    ((xl) >> (GMP_LIMB_BITS - (count))))
3584
3585
3586 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
3587    reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
3588    than a, b. The determinant must always be one, so that M has an
3589    inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
3590    bits. */
3591 struct hgcd_matrix1
3592 {
3593   mp_limb_t u[2][2];
3594 };
3595
3596 #define mpn_hgcd2 __MPN (hgcd2)
3597 int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
3598
3599 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
3600 mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3601
3602 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
3603 mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3604
3605 struct hgcd_matrix
3606 {
3607   mp_size_t alloc;              /* for sanity checking only */
3608   mp_size_t n;
3609   mp_ptr p[2][2];
3610 };
3611
3612 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
3613
3614 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
3615 void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
3616
3617 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
3618 void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
3619
3620 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
3621 mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3622
3623 #define mpn_hgcd_itch __MPN (hgcd_itch)
3624 mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
3625
3626 #define mpn_hgcd __MPN (hgcd)
3627 mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3628
3629 #define MPN_HGCD_LEHMER_ITCH(n) (n)
3630
3631 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
3632 mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3633
3634 /* Needs storage for the quotient */
3635 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
3636
3637 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
3638 mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3639
3640 #define MPN_GCD_LEHMER_N_ITCH(n) (n)
3641
3642 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
3643 mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3644
3645 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
3646 mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
3647
3648 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
3649
3650 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
3651 mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3652
3653 /* 4*(an + 1) + 4*(bn + 1) + an */
3654 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
3655
3656 #ifndef HGCD_THRESHOLD
3657 #define HGCD_THRESHOLD 400
3658 #endif
3659
3660 #ifndef GCD_DC_THRESHOLD
3661 #define GCD_DC_THRESHOLD 1000
3662 #endif
3663
3664 #ifndef GCDEXT_DC_THRESHOLD
3665 #define GCDEXT_DC_THRESHOLD 600
3666 #endif
3667
3668 /* Definitions for mpn_set_str and mpn_get_str */
3669 struct powers
3670 {
3671   mp_ptr p;                     /* actual power value */
3672   mp_size_t n;                  /* # of limbs at p */
3673   mp_size_t shift;              /* weight of lowest limb, in limb base B */
3674   size_t digits_in_base;        /* number of corresponding digits */
3675   int base;
3676 };
3677 typedef struct powers powers_t;
3678 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
3679 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
3680 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
3681 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
3682
3683 #define   mpn_dc_set_str __MPN(dc_set_str)
3684 mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
3685 #define   mpn_bc_set_str __MPN(bc_set_str)
3686 mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
3687 #define   mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
3688 void      mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
3689
3690
3691 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3692    limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
3693    hence giving back the user's size in bits rounded up.  Notice that
3694    converting prec->bits->prec gives an unchanged value.  */
3695 #define __GMPF_BITS_TO_PREC(n)                                          \
3696   ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3697 #define __GMPF_PREC_TO_BITS(n) \
3698   ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3699
3700 extern mp_size_t __gmp_default_fp_limb_precision;
3701
3702
3703 /* Set n to the number of significant digits an mpf of the given _mp_prec
3704    field, in the given base.  This is a rounded up value, designed to ensure
3705    there's enough digits to reproduce all the guaranteed part of the value.
3706
3707    There are prec many limbs, but the high might be only "1" so forget it
3708    and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
3709    further +1 is because the limbs usually won't fall on digit boundaries.
3710
3711    FIXME: If base is a power of 2 and the bits per digit divides
3712    BITS_PER_MP_LIMB then the +2 is unnecessary.  This happens always for
3713    base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3714
3715 #define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
3716   do {                                                                  \
3717     ASSERT (base >= 2 && base < numberof (mp_bases));                   \
3718     (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \
3719                         * mp_bases[(base)].chars_per_bit_exactly);      \
3720   } while (0)
3721
3722
3723 /* Decimal point string, from the current C locale.  Needs <langinfo.h> for
3724    nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
3725    DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3726    their respective #if HAVE_FOO_H.
3727
3728    GLIBC recommends nl_langinfo because getting only one facet can be
3729    faster, apparently. */
3730
3731 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3732 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3733 #define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
3734 #endif
3735 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3736 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3737 #define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
3738 #endif
3739 /* localeconv is slower since it returns all locale stuff */
3740 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3741 #define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
3742 #endif
3743 #if ! defined (GMP_DECIMAL_POINT)
3744 #define GMP_DECIMAL_POINT  (".")
3745 #endif
3746
3747
3748 #define DOPRNT_CONV_FIXED        1
3749 #define DOPRNT_CONV_SCIENTIFIC   2
3750 #define DOPRNT_CONV_GENERAL      3
3751
3752 #define DOPRNT_JUSTIFY_NONE      0
3753 #define DOPRNT_JUSTIFY_LEFT      1
3754 #define DOPRNT_JUSTIFY_RIGHT     2
3755 #define DOPRNT_JUSTIFY_INTERNAL  3
3756
3757 #define DOPRNT_SHOWBASE_YES      1
3758 #define DOPRNT_SHOWBASE_NO       2
3759 #define DOPRNT_SHOWBASE_NONZERO  3
3760
3761 struct doprnt_params_t {
3762   int         base;          /* negative for upper case */
3763   int         conv;          /* choices above */
3764   const char  *expfmt;       /* exponent format */
3765   int         exptimes4;     /* exponent multiply by 4 */
3766   char        fill;          /* character */
3767   int         justify;       /* choices above */
3768   int         prec;          /* prec field, or -1 for all digits */
3769   int         showbase;      /* choices above */
3770   int         showpoint;     /* if radix point always shown */
3771   int         showtrailing;  /* if trailing zeros wanted */
3772   char        sign;          /* '+', ' ', or '\0' */
3773   int         width;         /* width field */
3774 };
3775
3776 #if _GMP_H_HAVE_VA_LIST
3777
3778 typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
3779 typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
3780 typedef int (*doprnt_reps_t)   __GMP_PROTO ((void *, int, int));
3781 typedef int (*doprnt_final_t)  __GMP_PROTO ((void *));
3782
3783 struct doprnt_funs_t {
3784   doprnt_format_t  format;
3785   doprnt_memory_t  memory;
3786   doprnt_reps_t    reps;
3787   doprnt_final_t   final;   /* NULL if not required */
3788 };
3789
3790 extern const struct doprnt_funs_t  __gmp_fprintf_funs;
3791 extern const struct doprnt_funs_t  __gmp_sprintf_funs;
3792 extern const struct doprnt_funs_t  __gmp_snprintf_funs;
3793 extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
3794 extern const struct doprnt_funs_t  __gmp_ostream_funs;
3795
3796 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
3797    "size" of these have been written.  "alloc > size" is maintained, so
3798    there's room to store a '\0' at the end.  "result" is where the
3799    application wants the final block pointer.  */
3800 struct gmp_asprintf_t {
3801   char    **result;
3802   char    *buf;
3803   size_t  size;
3804   size_t  alloc;
3805 };
3806
3807 #define GMP_ASPRINTF_T_INIT(d, output)                          \
3808   do {                                                          \
3809     (d).result = (output);                                      \
3810     (d).alloc = 256;                                            \
3811     (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
3812     (d).size = 0;                                               \
3813   } while (0)
3814
3815 /* If a realloc is necessary, use twice the size actually required, so as to
3816    avoid repeated small reallocs.  */
3817 #define GMP_ASPRINTF_T_NEED(d, n)                                       \
3818   do {                                                                  \
3819     size_t  alloc, newsize, newalloc;                                   \
3820     ASSERT ((d)->alloc >= (d)->size + 1);                               \
3821                                                                         \
3822     alloc = (d)->alloc;                                                 \
3823     newsize = (d)->size + (n);                                          \
3824     if (alloc <= newsize)                                               \
3825       {                                                                 \
3826         newalloc = 2*newsize;                                           \
3827         (d)->alloc = newalloc;                                          \
3828         (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
3829                                                alloc, newalloc, char);  \
3830       }                                                                 \
3831   } while (0)
3832
3833 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
3834 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
3835 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
3836
3837 /* buf is where to write the next output, and size is how much space is left
3838    there.  If the application passed size==0 then that's what we'll have
3839    here, and nothing at all should be written.  */
3840 struct gmp_snprintf_t {
3841   char    *buf;
3842   size_t  size;
3843 };
3844
3845 /* Add the bytes printed by the call to the total retval, or bail out on an
3846    error.  */
3847 #define DOPRNT_ACCUMULATE(call) \
3848   do {                          \
3849     int  __ret;                 \
3850     __ret = call;               \
3851     if (__ret == -1)            \
3852       goto error;               \
3853     retval += __ret;            \
3854   } while (0)
3855 #define DOPRNT_ACCUMULATE_FUN(fun, params)      \
3856   do {                                          \
3857     ASSERT ((fun) != NULL);                     \
3858     DOPRNT_ACCUMULATE ((*(fun)) params);        \
3859   } while (0)
3860
3861 #define DOPRNT_FORMAT(fmt, ap)                          \
3862   DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
3863 #define DOPRNT_MEMORY(ptr, len)                                 \
3864   DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
3865 #define DOPRNT_REPS(c, n)                               \
3866   DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
3867
3868 #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
3869
3870 #define DOPRNT_REPS_MAYBE(c, n) \
3871   do {                          \
3872     if ((n) != 0)               \
3873       DOPRNT_REPS (c, n);       \
3874   } while (0)
3875 #define DOPRNT_MEMORY_MAYBE(ptr, len)   \
3876   do {                                  \
3877     if ((len) != 0)                     \
3878       DOPRNT_MEMORY (ptr, len);         \
3879   } while (0)
3880
3881 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
3882 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
3883
3884 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
3885 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
3886
3887 int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
3888 #endif /* _GMP_H_HAVE_VA_LIST */
3889
3890
3891 typedef int (*gmp_doscan_scan_t)  __GMP_PROTO ((void *, const char *, ...));
3892 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
3893 typedef int (*gmp_doscan_get_t)   __GMP_PROTO ((void *));
3894 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
3895
3896 struct gmp_doscan_funs_t {
3897   gmp_doscan_scan_t   scan;
3898   gmp_doscan_step_t   step;
3899   gmp_doscan_get_t    get;
3900   gmp_doscan_unget_t  unget;
3901 };
3902 extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
3903 extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
3904
3905 #if _GMP_H_HAVE_VA_LIST
3906 int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *,
3907                           const char *, va_list));
3908 #endif
3909
3910
3911 /* For testing and debugging.  */
3912 #define MPZ_CHECK_FORMAT(z)                                     \
3913   do {                                                          \
3914     ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);   \
3915     ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                       \
3916     ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
3917   } while (0)
3918
3919 #define MPQ_CHECK_FORMAT(q)                             \
3920   do {                                                  \
3921     MPZ_CHECK_FORMAT (mpq_numref (q));                  \
3922     MPZ_CHECK_FORMAT (mpq_denref (q));                  \
3923     ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
3924                                                         \
3925     if (SIZ(mpq_numref(q)) == 0)                        \
3926       {                                                 \
3927         /* should have zero as 0/1 */                   \
3928         ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
3929                        && PTR(mpq_denref(q))[0] == 1);  \
3930       }                                                 \
3931     else                                                \
3932       {                                                 \
3933         /* should have no common factors */             \
3934         mpz_t  g;                                       \
3935         mpz_init (g);                                   \
3936         mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
3937         ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
3938         mpz_clear (g);                                  \
3939       }                                                 \
3940   } while (0)
3941
3942 #define MPF_CHECK_FORMAT(f)                             \
3943   do {                                                  \
3944     ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
3945     ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
3946     if (SIZ(f) == 0)                                    \
3947       ASSERT_ALWAYS (EXP(f) == 0);                      \
3948     if (SIZ(f) != 0)                                    \
3949       ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
3950   } while (0)
3951
3952
3953 #define MPZ_PROVOKE_REALLOC(z)                                  \
3954   do { ALLOC(z) = ABSIZ(z); } while (0)
3955
3956
3957 /* Enhancement: The "mod" and "gcd_1" functions below could have
3958    __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
3959    function pointers, only actual functions.  It probably doesn't make much
3960    difference to the gmp code, since hopefully we arrange calls so there's
3961    no great need for the compiler to move things around.  */
3962
3963 #if WANT_FAT_BINARY && HAVE_HOST_CPU_FAMILY_x86
3964 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
3965    in mpn/x86/x86-defs.m4.  Be sure to update that when changing here.  */
3966 struct cpuvec_t {
3967   DECL_add_n           ((*add_n));
3968   DECL_addmul_1        ((*addmul_1));
3969   DECL_copyd           ((*copyd));
3970   DECL_copyi           ((*copyi));
3971   DECL_divexact_1      ((*divexact_1));
3972   DECL_divexact_by3c   ((*divexact_by3c));
3973   DECL_divrem_1        ((*divrem_1));
3974   DECL_gcd_1           ((*gcd_1));
3975   DECL_lshift          ((*lshift));
3976   DECL_mod_1           ((*mod_1));
3977   DECL_mod_34lsub1     ((*mod_34lsub1));
3978   DECL_modexact_1c_odd ((*modexact_1c_odd));
3979   DECL_mul_1           ((*mul_1));
3980   DECL_mul_basecase    ((*mul_basecase));
3981   DECL_preinv_divrem_1 ((*preinv_divrem_1));
3982   DECL_preinv_mod_1    ((*preinv_mod_1));
3983   DECL_rshift          ((*rshift));
3984   DECL_sqr_basecase    ((*sqr_basecase));
3985   DECL_sub_n           ((*sub_n));
3986   DECL_submul_1        ((*submul_1));
3987   int                  initialized;
3988   mp_size_t            mul_karatsuba_threshold;
3989   mp_size_t            mul_toom3_threshold;
3990   mp_size_t            sqr_karatsuba_threshold;
3991   mp_size_t            sqr_toom3_threshold;
3992 };
3993 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
3994 #endif /* x86 fat binary */
3995
3996 void __gmpn_cpuvec_init __GMP_PROTO ((void));
3997
3998 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
3999    if that hasn't yet been done (to establish the right values).  */
4000 #define CPUVEC_THRESHOLD(field)                                               \
4001   ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
4002    __gmpn_cpuvec.field)
4003
4004
4005 #if HAVE_NATIVE_mpn_add_nc
4006 #define mpn_add_nc __MPN(add_nc)
4007 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4008 #else
4009 static inline
4010 mp_limb_t
4011 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4012 {
4013   mp_limb_t co;
4014   co = mpn_add_n (rp, up, vp, n);
4015   co += mpn_add_1 (rp, rp, n, ci);
4016   return co;
4017 }
4018 #endif
4019
4020 #if HAVE_NATIVE_mpn_sub_nc
4021 #define mpn_sub_nc __MPN(sub_nc)
4022 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4023 #else
4024 static inline mp_limb_t
4025 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4026 {
4027   mp_limb_t co;
4028   co = mpn_sub_n (rp, up, vp, n);
4029   co += mpn_sub_1 (rp, rp, n, ci);
4030   return co;
4031 }
4032 #endif
4033
4034 static inline int
4035 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4036 {
4037   mp_size_t i;
4038   for (i = n - 1; i >= 0; i--)
4039     {
4040       if (ap[i] != 0)
4041         return 0;
4042     }
4043   return 1;
4044 }
4045
4046 #if TUNE_PROGRAM_BUILD
4047 /* Some extras wanted when recompiling some .c files for use by the tune
4048    program.  Not part of a normal build.
4049
4050    It's necessary to keep these thresholds as #defines (just to an
4051    identically named variable), since various defaults are established based
4052    on #ifdef in the .c files.  For some this is not so (the defaults are
4053    instead established above), but all are done this way for consistency. */
4054
4055 #undef  MUL_KARATSUBA_THRESHOLD
4056 #define MUL_KARATSUBA_THRESHOLD      mul_karatsuba_threshold
4057 extern mp_size_t                     mul_karatsuba_threshold;
4058
4059 #undef  MUL_TOOM3_THRESHOLD
4060 #define MUL_TOOM3_THRESHOLD          mul_toom3_threshold
4061 extern mp_size_t                     mul_toom3_threshold;
4062
4063 #undef  MUL_TOOM44_THRESHOLD
4064 #define MUL_TOOM44_THRESHOLD         mul_toom44_threshold
4065 extern mp_size_t                     mul_toom44_threshold;
4066
4067 #undef  MUL_FFT_THRESHOLD
4068 #define MUL_FFT_THRESHOLD            mul_fft_threshold
4069 extern mp_size_t                     mul_fft_threshold;
4070
4071 #undef  MUL_FFT_MODF_THRESHOLD
4072 #define MUL_FFT_MODF_THRESHOLD       mul_fft_modf_threshold
4073 extern mp_size_t                     mul_fft_modf_threshold;
4074
4075 #undef  MUL_FFT_TABLE
4076 #define MUL_FFT_TABLE                { 0 }
4077
4078 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4079    remain as zero (always use it). */
4080 #if ! HAVE_NATIVE_mpn_sqr_basecase
4081 #undef  SQR_BASECASE_THRESHOLD
4082 #define SQR_BASECASE_THRESHOLD       sqr_basecase_threshold
4083 extern mp_size_t                     sqr_basecase_threshold;
4084 #endif
4085
4086 #if TUNE_PROGRAM_BUILD_SQR
4087 #undef  SQR_KARATSUBA_THRESHOLD
4088 #define SQR_KARATSUBA_THRESHOLD      SQR_KARATSUBA_MAX_GENERIC
4089 #else
4090 #undef  SQR_KARATSUBA_THRESHOLD
4091 #define SQR_KARATSUBA_THRESHOLD      sqr_karatsuba_threshold
4092 extern mp_size_t                     sqr_karatsuba_threshold;
4093 #endif
4094
4095 #undef  SQR_TOOM3_THRESHOLD
4096 #define SQR_TOOM3_THRESHOLD          sqr_toom3_threshold
4097 extern mp_size_t                     sqr_toom3_threshold;
4098
4099 #undef  SQR_TOOM4_THRESHOLD
4100 #define SQR_TOOM4_THRESHOLD          sqr_toom4_threshold
4101 extern mp_size_t                     sqr_toom4_threshold;
4102
4103 #undef SQR_FFT_THRESHOLD
4104 #define SQR_FFT_THRESHOLD            sqr_fft_threshold
4105 extern mp_size_t                     sqr_fft_threshold;
4106
4107 #undef SQR_FFT_MODF_THRESHOLD
4108 #define SQR_FFT_MODF_THRESHOLD       sqr_fft_modf_threshold
4109 extern mp_size_t                     sqr_fft_modf_threshold;
4110
4111 #undef  SQR_FFT_TABLE
4112 #define SQR_FFT_TABLE                { 0 }
4113
4114 #undef  MULLOW_BASECASE_THRESHOLD
4115 #define MULLOW_BASECASE_THRESHOLD    mullow_basecase_threshold
4116 extern mp_size_t                     mullow_basecase_threshold;
4117
4118 #undef  MULLOW_DC_THRESHOLD
4119 #define MULLOW_DC_THRESHOLD          mullow_dc_threshold
4120 extern mp_size_t                     mullow_dc_threshold;
4121
4122 #undef  MULLOW_MUL_N_THRESHOLD
4123 #define MULLOW_MUL_N_THRESHOLD       mullow_mul_n_threshold
4124 extern mp_size_t                     mullow_mul_n_threshold;
4125
4126
4127 #if ! UDIV_PREINV_ALWAYS
4128 #undef  DIV_SB_PREINV_THRESHOLD
4129 #define DIV_SB_PREINV_THRESHOLD      div_sb_preinv_threshold
4130 extern mp_size_t                     div_sb_preinv_threshold;
4131 #endif
4132
4133 #undef  DIV_DC_THRESHOLD
4134 #define DIV_DC_THRESHOLD             div_dc_threshold
4135 extern mp_size_t                     div_dc_threshold;
4136
4137 #undef  POWM_THRESHOLD
4138 #define POWM_THRESHOLD               powm_threshold
4139 extern mp_size_t                     powm_threshold;
4140
4141 #undef  MATRIX22_STRASSEN_THRESHOLD
4142 #define MATRIX22_STRASSEN_THRESHOLD  matrix22_strassen_threshold
4143 extern mp_size_t                     matrix22_strassen_threshold;
4144
4145 #undef  HGCD_THRESHOLD
4146 #define HGCD_THRESHOLD               hgcd_threshold
4147 extern mp_size_t                     hgcd_threshold;
4148
4149 #undef  GCD_ACCEL_THRESHOLD
4150 #define GCD_ACCEL_THRESHOLD          gcd_accel_threshold
4151 extern mp_size_t                     gcd_accel_threshold;
4152
4153 #undef  GCD_DC_THRESHOLD
4154 #define GCD_DC_THRESHOLD             gcd_dc_threshold
4155 extern mp_size_t                     gcd_dc_threshold;
4156
4157 #undef GCDEXT_DC_THRESHOLD
4158 #define GCDEXT_DC_THRESHOLD          gcdext_dc_threshold
4159 extern mp_size_t                     gcdext_dc_threshold;
4160
4161 #undef DIVREM_1_NORM_THRESHOLD
4162 #define DIVREM_1_NORM_THRESHOLD      divrem_1_norm_threshold
4163 extern mp_size_t                     divrem_1_norm_threshold;
4164
4165 #undef DIVREM_1_UNNORM_THRESHOLD
4166 #define DIVREM_1_UNNORM_THRESHOLD    divrem_1_unnorm_threshold
4167 extern mp_size_t                     divrem_1_unnorm_threshold;
4168
4169 #undef MOD_1_NORM_THRESHOLD
4170 #define MOD_1_NORM_THRESHOLD         mod_1_norm_threshold
4171 extern mp_size_t                     mod_1_norm_threshold;
4172
4173 #undef MOD_1_UNNORM_THRESHOLD
4174 #define MOD_1_UNNORM_THRESHOLD       mod_1_unnorm_threshold
4175 extern mp_size_t                     mod_1_unnorm_threshold;
4176
4177 #undef MOD_1_1_THRESHOLD
4178 #define MOD_1_1_THRESHOLD            mod_1_1_threshold
4179 extern mp_size_t                     mod_1_1_threshold;
4180
4181 #undef MOD_1_2_THRESHOLD
4182 #define MOD_1_2_THRESHOLD            mod_1_2_threshold
4183 extern mp_size_t                     mod_1_2_threshold;
4184
4185 #undef MOD_1_3_THRESHOLD
4186 #define MOD_1_3_THRESHOLD            mod_1_3_threshold
4187 extern mp_size_t                     mod_1_3_threshold;
4188
4189 #undef MOD_1_4_THRESHOLD
4190 #define MOD_1_4_THRESHOLD            mod_1_4_threshold
4191 extern mp_size_t                     mod_1_4_threshold;
4192
4193 #if ! UDIV_PREINV_ALWAYS
4194 #undef  DIVREM_2_THRESHOLD
4195 #define DIVREM_2_THRESHOLD           divrem_2_threshold
4196 extern mp_size_t                     divrem_2_threshold;
4197 #endif
4198
4199 #undef  GET_STR_DC_THRESHOLD
4200 #define GET_STR_DC_THRESHOLD         get_str_dc_threshold
4201 extern mp_size_t                     get_str_dc_threshold;
4202
4203 #undef GET_STR_PRECOMPUTE_THRESHOLD
4204 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
4205 extern mp_size_t                     get_str_precompute_threshold;
4206
4207 #undef  SET_STR_DC_THRESHOLD
4208 #define SET_STR_DC_THRESHOLD         set_str_dc_threshold
4209 extern mp_size_t                     set_str_dc_threshold;
4210
4211 #undef SET_STR_PRECOMPUTE_THRESHOLD
4212 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
4213 extern mp_size_t                     set_str_precompute_threshold;
4214
4215 #undef  SET_STR_THRESHOLD
4216 #define SET_STR_THRESHOLD            set_str_threshold
4217 extern mp_size_t                     SET_STR_THRESHOLD;
4218
4219 #undef  FFT_TABLE_ATTRS
4220 #define FFT_TABLE_ATTRS
4221 extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4222
4223 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4224 #undef MUL_KARATSUBA_THRESHOLD_LIMIT
4225 #undef MUL_TOOM3_THRESHOLD_LIMIT
4226 #undef MULLOW_BASECASE_THRESHOLD_LIMIT
4227 #undef SQR_TOOM3_THRESHOLD_LIMIT
4228 #define SQR_KARATSUBA_MAX_GENERIC       200
4229 #define MUL_KARATSUBA_THRESHOLD_LIMIT   700
4230 #define MUL_TOOM3_THRESHOLD_LIMIT       700
4231 #define SQR_TOOM3_THRESHOLD_LIMIT       400
4232 #define MUL_TOOM44_THRESHOLD_LIMIT     1000
4233 #define SQR_TOOM4_THRESHOLD_LIMIT      1000
4234 #define MULLOW_BASECASE_THRESHOLD_LIMIT 200
4235 #define GET_STR_THRESHOLD_LIMIT         150
4236
4237 /* "thresh" will normally be a variable when tuning, so use the cached
4238    result.  This helps mpn_sb_divrem_mn for instance.  */
4239 #undef  CACHED_ABOVE_THRESHOLD
4240 #define CACHED_ABOVE_THRESHOLD(cache, thresh)  (cache)
4241 #undef  CACHED_BELOW_THRESHOLD
4242 #define CACHED_BELOW_THRESHOLD(cache, thresh)  (cache)
4243
4244 #endif /* TUNE_PROGRAM_BUILD */
4245
4246 #if defined (__cplusplus)
4247 }
4248 #endif
4249
4250
4251 #ifdef __cplusplus
4252
4253 /* A little helper for a null-terminated __gmp_allocate_func string.
4254    The destructor ensures it's freed even if an exception is thrown.
4255    The len field is needed by the destructor, and can be used by anyone else
4256    to avoid a second strlen pass over the data.
4257
4258    Since our input is a C string, using strlen is correct.  Perhaps it'd be
4259    more C++-ish style to use std::char_traits<char>::length, but char_traits
4260    isn't available in gcc 2.95.4.  */
4261
4262 class gmp_allocated_string {
4263  public:
4264   char *str;
4265   size_t len;
4266   gmp_allocated_string(char *arg)
4267   {
4268     str = arg;
4269     len = std::strlen (str);
4270   }
4271   ~gmp_allocated_string()
4272   {
4273     (*__gmp_free_func) (str, len+1);
4274   }
4275 };
4276
4277 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
4278 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
4279 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
4280 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
4281 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
4282 extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
4283
4284 #endif /* __cplusplus */
4285
4286 #endif /* __GMP_IMPL_H__ */