sh: Add test for df6b9adb7f429266f4faf79629df957f76d736f3.
[dragonfly.git] / contrib / gmp / gmp-impl.h
blobd6336569c282d58988f2050c2893d4bb897f4fb5
1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
7 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 /* __GMP_DECLSPEC must be given on any global data that will be accessed
26 from outside libgmp, meaning from the test or development programs, or
27 from libgmpxx. Failing to do this will result in an incorrect address
28 being used for the accesses. On functions __GMP_DECLSPEC makes calls
29 from outside libgmp more efficient, but they'll still work fine without
30 it. */
33 #ifndef __GMP_IMPL_H__
34 #define __GMP_IMPL_H__
36 #if defined _CRAY
37 #include <intrinsics.h> /* for _popcnt */
38 #endif
40 /* limits.h is not used in general, since it's an ANSI-ism, and since on
41 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
42 values (the ABI=64 values).
44 On Cray vector systems, however, we need the system limits.h since sizes
45 of signed and unsigned types can differ there, depending on compiler
46 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
47 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
48 short can be 24, 32, 46 or 64 bits, and different for ushort. */
50 #if defined _CRAY
51 #include <limits.h>
52 #endif
54 /* For fat.h and other fat binary stuff.
55 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
56 declared this way are only used to set function pointers in __gmp_cpuvec,
57 they're not called directly. */
58 #define DECL_add_n(name) \
59 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
60 #define DECL_addmul_1(name) \
61 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
62 #define DECL_copyd(name) \
63 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
64 #define DECL_copyi(name) \
65 DECL_copyd (name)
66 #define DECL_divexact_1(name) \
67 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
68 #define DECL_divexact_by3c(name) \
69 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
70 #define DECL_divrem_1(name) \
71 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
72 #define DECL_gcd_1(name) \
73 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
74 #define DECL_lshift(name) \
75 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
76 #define DECL_mod_1(name) \
77 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
78 #define DECL_mod_34lsub1(name) \
79 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
80 #define DECL_modexact_1c_odd(name) \
81 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
82 #define DECL_mul_1(name) \
83 DECL_addmul_1 (name)
84 #define DECL_mul_basecase(name) \
85 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
86 #define DECL_preinv_divrem_1(name) \
87 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
88 #define DECL_preinv_mod_1(name) \
89 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
90 #define DECL_rshift(name) \
91 DECL_lshift (name)
92 #define DECL_sqr_basecase(name) \
93 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
94 #define DECL_sub_n(name) \
95 DECL_add_n (name)
96 #define DECL_submul_1(name) \
97 DECL_addmul_1 (name)
99 #if ! __GMP_WITHIN_CONFIGURE
100 #include "config.h"
101 #include "gmp-mparam.h"
102 #include "fib_table.h"
103 #include "mp_bases.h"
104 #if WANT_FAT_BINARY
105 #include "fat.h"
106 #endif
107 #endif
109 #if HAVE_INTTYPES_H /* for uint_least32_t */
110 # include <inttypes.h>
111 #else
112 # if HAVE_STDINT_H
113 # include <stdint.h>
114 # endif
115 #endif
117 #ifdef __cplusplus
118 #include <cstring> /* for strlen */
119 #include <string> /* for std::string */
120 #endif
123 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
124 #define WANT_TMP_DEBUG 0
125 #endif
127 /* The following tries to get a good version of alloca. The tests are
128 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
129 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
130 be setup appropriately.
132 ifndef alloca - a cpp define might already exist.
133 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
134 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
136 GCC __builtin_alloca - preferred whenever available.
138 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
139 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
140 used in past versions of GMP, retained still in case it matters.
142 The autoconf manual says this pragma needs to be at the start of a C
143 file, apart from comments and preprocessor directives. Is that true?
144 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
145 from gmp.h.
148 #ifndef alloca
149 # ifdef __GNUC__
150 # define alloca __builtin_alloca
151 # else
152 # ifdef __DECC
153 # define alloca(x) __ALLOCA(x)
154 # else
155 # ifdef _MSC_VER
156 # include <malloc.h>
157 # define alloca _alloca
158 # else
159 # if HAVE_ALLOCA_H
160 # include <alloca.h>
161 # else
162 # if defined (_AIX) || defined (_IBMR2)
163 #pragma alloca
164 # else
165 char *alloca ();
166 # endif
167 # endif
168 # endif
169 # endif
170 # endif
171 #endif
174 /* if not provided by gmp-mparam.h */
175 #ifndef BYTES_PER_MP_LIMB
176 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T
177 #endif
178 #define GMP_LIMB_BYTES BYTES_PER_MP_LIMB
179 #ifndef GMP_LIMB_BITS
180 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T)
181 #endif
183 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
186 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
187 #if HAVE_UINT_LEAST32_T
188 typedef uint_least32_t gmp_uint_least32_t;
189 #else
190 #if SIZEOF_UNSIGNED_SHORT >= 4
191 typedef unsigned short gmp_uint_least32_t;
192 #else
193 #if SIZEOF_UNSIGNED >= 4
194 typedef unsigned gmp_uint_least32_t;
195 #else
196 typedef unsigned long gmp_uint_least32_t;
197 #endif
198 #endif
199 #endif
202 /* gmp_intptr_t, for pointer to integer casts */
203 #if HAVE_INTPTR_T
204 typedef intptr_t gmp_intptr_t;
205 #else /* fallback */
206 typedef size_t gmp_intptr_t;
207 #endif
210 /* pre-inverse types for truncating division and modulo */
211 typedef struct {mp_limb_t inv32;} gmp_pi1_t;
212 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
215 /* const and signed must match __gmp_const and __gmp_signed, so follow the
216 decision made for those in gmp.h. */
217 #if ! __GMP_HAVE_CONST
218 #define const /* empty */
219 #define signed /* empty */
220 #endif
222 /* "const" basically means a function does nothing but examine its arguments
223 and give a return value, it doesn't read or write any memory (neither
224 global nor pointed to by arguments), and has no other side-effects. This
225 is more restrictive than "pure". See info node "(gcc)Function
226 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
227 this off when trying to write timing loops. */
228 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
229 #define ATTRIBUTE_CONST __attribute__ ((const))
230 #else
231 #define ATTRIBUTE_CONST
232 #endif
234 #if HAVE_ATTRIBUTE_NORETURN
235 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
236 #else
237 #define ATTRIBUTE_NORETURN
238 #endif
240 /* "malloc" means a function behaves like malloc in that the pointer it
241 returns doesn't alias anything. */
242 #if HAVE_ATTRIBUTE_MALLOC
243 #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
244 #else
245 #define ATTRIBUTE_MALLOC
246 #endif
249 #if ! HAVE_STRCHR
250 #define strchr(s,c) index(s,c)
251 #endif
253 #if ! HAVE_MEMSET
254 #define memset(p, c, n) \
255 do { \
256 ASSERT ((n) >= 0); \
257 char *__memset__p = (p); \
258 int __i; \
259 for (__i = 0; __i < (n); __i++) \
260 __memset__p[__i] = (c); \
261 } while (0)
262 #endif
264 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
265 mode. Falling back to a memcpy will give maximum portability, since it
266 works no matter whether va_list is a pointer, struct or array. */
267 #if ! defined (va_copy) && defined (__va_copy)
268 #define va_copy(dst,src) __va_copy(dst,src)
269 #endif
270 #if ! defined (va_copy)
271 #define va_copy(dst,src) \
272 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
273 #endif
276 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
277 (ie. ctlz, ctpop, cttz). */
278 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
279 || HAVE_HOST_CPU_alphaev7
280 #define HAVE_HOST_CPU_alpha_CIX 1
281 #endif
284 #if defined (__cplusplus)
285 extern "C" {
286 #endif
289 /* Usage: TMP_DECL;
290 TMP_MARK;
291 ptr = TMP_ALLOC (bytes);
292 TMP_FREE;
294 Small allocations should use TMP_SALLOC, big allocations should use
295 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
297 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
298 TMP_SFREE.
300 TMP_DECL just declares a variable, but might be empty and so must be last
301 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
302 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
303 TMP_MARK was made, but then no TMP_ALLOCs. */
305 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
306 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC
307 isn't used for "double"s, so that's not in the union. */
308 union tmp_align_t {
309 mp_limb_t l;
310 char *p;
312 #define __TMP_ALIGN sizeof (union tmp_align_t)
314 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
315 "a" must be an unsigned type.
316 This is designed for use with a compile-time constant "m".
317 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
318 "(-(8*n))%8" or the like is always zero, which means the rounding up in
319 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
320 #define ROUND_UP_MULTIPLE(a,m) \
321 (POW2_P(m) ? (a) + (-(a))%(m) \
322 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
324 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
325 struct tmp_reentrant_t {
326 struct tmp_reentrant_t *next;
327 size_t size; /* bytes, including header */
329 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
330 __GMP_DECLSPEC void __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
331 #endif
333 #if WANT_TMP_ALLOCA
334 #define TMP_SDECL
335 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
336 #define TMP_SMARK
337 #define TMP_MARK __tmp_marker = 0
338 #define TMP_SALLOC(n) alloca(n)
339 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
340 #define TMP_ALLOC(n) \
341 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
342 #define TMP_SFREE
343 #define TMP_FREE \
344 do { \
345 if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
346 } while (0)
347 #endif
349 #if WANT_TMP_REENTRANT
350 #define TMP_SDECL TMP_DECL
351 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
352 #define TMP_SMARK TMP_MARK
353 #define TMP_MARK __tmp_marker = 0
354 #define TMP_SALLOC(n) TMP_ALLOC(n)
355 #define TMP_BALLOC(n) TMP_ALLOC(n)
356 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
357 #define TMP_SFREE TMP_FREE
358 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
359 #endif
361 #if WANT_TMP_NOTREENTRANT
362 struct tmp_marker
364 struct tmp_stack *which_chunk;
365 void *alloc_point;
367 __GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
368 __GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
369 __GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
370 #define TMP_SDECL TMP_DECL
371 #define TMP_DECL struct tmp_marker __tmp_marker
372 #define TMP_SMARK TMP_MARK
373 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
374 #define TMP_SALLOC(n) TMP_ALLOC(n)
375 #define TMP_BALLOC(n) TMP_ALLOC(n)
376 #define TMP_ALLOC(n) \
377 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
378 #define TMP_SFREE TMP_FREE
379 #define TMP_FREE __gmp_tmp_free (&__tmp_marker)
380 #endif
382 #if WANT_TMP_DEBUG
383 /* See tal-debug.c for some comments. */
384 struct tmp_debug_t {
385 struct tmp_debug_entry_t *list;
386 const char *file;
387 int line;
389 struct tmp_debug_entry_t {
390 struct tmp_debug_entry_t *next;
391 char *block;
392 size_t size;
394 __GMP_DECLSPEC void __gmp_tmp_debug_mark __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
395 struct tmp_debug_t *,
396 const char *, const char *));
397 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
398 struct tmp_debug_t **, const char *,
399 size_t)) ATTRIBUTE_MALLOC;
400 __GMP_DECLSPEC void __gmp_tmp_debug_free __GMP_PROTO ((const char *, int, int,
401 struct tmp_debug_t **,
402 const char *, const char *));
403 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
404 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
405 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
406 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
407 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
408 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
409 /* The marker variable is designed to provoke an uninitialized variable
410 warning from the compiler if TMP_FREE is used without a TMP_MARK.
411 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
412 these things up too. */
413 #define TMP_DECL_NAME(marker, marker_name) \
414 int marker; \
415 int __tmp_marker_inscope; \
416 const char *__tmp_marker_name = marker_name; \
417 struct tmp_debug_t __tmp_marker_struct; \
418 /* don't demand NULL, just cast a zero */ \
419 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
420 #define TMP_MARK_NAME(marker, marker_name) \
421 do { \
422 marker = 1; \
423 __tmp_marker_inscope = 1; \
424 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
425 &__tmp_marker, &__tmp_marker_struct, \
426 __tmp_marker_name, marker_name); \
427 } while (0)
428 #define TMP_SALLOC(n) TMP_ALLOC(n)
429 #define TMP_BALLOC(n) TMP_ALLOC(n)
430 #define TMP_ALLOC(size) \
431 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
432 __tmp_marker_inscope, \
433 &__tmp_marker, __tmp_marker_name, size)
434 #define TMP_FREE_NAME(marker, marker_name) \
435 do { \
436 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
437 marker, &__tmp_marker, \
438 __tmp_marker_name, marker_name); \
439 } while (0)
440 #endif /* WANT_TMP_DEBUG */
443 /* Allocating various types. */
444 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
445 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
446 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
447 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
448 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
449 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
450 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
451 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
452 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
454 /* It's more efficient to allocate one block than two. This is certainly
455 true of the malloc methods, but it can even be true of alloca if that
456 involves copying a chunk of stack (various RISCs), or a call to a stack
457 bounds check (mingw). In any case, when debugging keep separate blocks
458 so a redzoning malloc debugger can protect each individually. */
459 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
460 do { \
461 if (WANT_TMP_DEBUG) \
463 (xp) = TMP_ALLOC_LIMBS (xsize); \
464 (yp) = TMP_ALLOC_LIMBS (ysize); \
466 else \
468 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
469 (yp) = (xp) + (xsize); \
471 } while (0)
474 /* From gmp.h, nicer names for internal use. */
475 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
476 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
477 #define LIKELY(cond) __GMP_LIKELY(cond)
478 #define UNLIKELY(cond) __GMP_UNLIKELY(cond)
480 #define ABS(x) ((x) >= 0 ? (x) : -(x))
481 #undef MIN
482 #define MIN(l,o) ((l) < (o) ? (l) : (o))
483 #undef MAX
484 #define MAX(h,i) ((h) > (i) ? (h) : (i))
485 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
487 /* Field access macros. */
488 #define SIZ(x) ((x)->_mp_size)
489 #define ABSIZ(x) ABS (SIZ (x))
490 #define PTR(x) ((x)->_mp_d)
491 #define LIMBS(x) ((x)->_mp_d)
492 #define EXP(x) ((x)->_mp_exp)
493 #define PREC(x) ((x)->_mp_prec)
494 #define ALLOC(x) ((x)->_mp_alloc)
496 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
497 then that lowest one bit must have been the only bit set. n==0 will
498 return true though, so avoid that. */
499 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
502 /* The "short" defines are a bit different because shorts are promoted to
503 ints by ~ or >> etc.
505 #ifndef's are used since on some systems (HP?) header files other than
506 limits.h setup these defines. We could forcibly #undef in that case, but
507 there seems no need to worry about that. */
509 #ifndef ULONG_MAX
510 #define ULONG_MAX __GMP_ULONG_MAX
511 #endif
512 #ifndef UINT_MAX
513 #define UINT_MAX __GMP_UINT_MAX
514 #endif
515 #ifndef USHRT_MAX
516 #define USHRT_MAX __GMP_USHRT_MAX
517 #endif
518 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
520 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
521 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
522 treats the plain decimal values in <limits.h> as signed. */
523 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
524 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
525 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
526 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
528 #ifndef LONG_MIN
529 #define LONG_MIN ((long) ULONG_HIGHBIT)
530 #endif
531 #ifndef LONG_MAX
532 #define LONG_MAX (-(LONG_MIN+1))
533 #endif
535 #ifndef INT_MIN
536 #define INT_MIN ((int) UINT_HIGHBIT)
537 #endif
538 #ifndef INT_MAX
539 #define INT_MAX (-(INT_MIN+1))
540 #endif
542 #ifndef SHRT_MIN
543 #define SHRT_MIN ((short) USHRT_HIGHBIT)
544 #endif
545 #ifndef SHRT_MAX
546 #define SHRT_MAX ((short) (-(SHRT_MIN+1)))
547 #endif
549 #if __GMP_MP_SIZE_T_INT
550 #define MP_SIZE_T_MAX INT_MAX
551 #define MP_SIZE_T_MIN INT_MIN
552 #else
553 #define MP_SIZE_T_MAX LONG_MAX
554 #define MP_SIZE_T_MIN LONG_MIN
555 #endif
557 /* mp_exp_t is the same as mp_size_t */
558 #define MP_EXP_T_MAX MP_SIZE_T_MAX
559 #define MP_EXP_T_MIN MP_SIZE_T_MIN
561 #define LONG_HIGHBIT LONG_MIN
562 #define INT_HIGHBIT INT_MIN
563 #define SHRT_HIGHBIT SHRT_MIN
566 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
568 #if GMP_NAIL_BITS == 0
569 #define GMP_NAIL_LOWBIT CNST_LIMB(0)
570 #else
571 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
572 #endif
574 #if GMP_NAIL_BITS != 0
575 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
576 code that has not yet been qualified. */
578 #undef DC_DIV_QR_THRESHOLD
579 #define DC_DIV_QR_THRESHOLD 50
581 #undef DIVREM_1_NORM_THRESHOLD
582 #undef DIVREM_1_UNNORM_THRESHOLD
583 #undef MOD_1_NORM_THRESHOLD
584 #undef MOD_1_UNNORM_THRESHOLD
585 #undef USE_PREINV_DIVREM_1
586 #undef DIVREM_2_THRESHOLD
587 #undef DIVEXACT_1_THRESHOLD
588 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
589 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
590 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
591 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
592 #define USE_PREINV_DIVREM_1 0 /* no preinv */
593 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
595 /* mpn/generic/mul_fft.c is not nails-capable. */
596 #undef MUL_FFT_THRESHOLD
597 #undef SQR_FFT_THRESHOLD
598 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
599 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
600 #endif
602 /* Swap macros. */
604 #define MP_LIMB_T_SWAP(x, y) \
605 do { \
606 mp_limb_t __mp_limb_t_swap__tmp = (x); \
607 (x) = (y); \
608 (y) = __mp_limb_t_swap__tmp; \
609 } while (0)
610 #define MP_SIZE_T_SWAP(x, y) \
611 do { \
612 mp_size_t __mp_size_t_swap__tmp = (x); \
613 (x) = (y); \
614 (y) = __mp_size_t_swap__tmp; \
615 } while (0)
617 #define MP_PTR_SWAP(x, y) \
618 do { \
619 mp_ptr __mp_ptr_swap__tmp = (x); \
620 (x) = (y); \
621 (y) = __mp_ptr_swap__tmp; \
622 } while (0)
623 #define MP_SRCPTR_SWAP(x, y) \
624 do { \
625 mp_srcptr __mp_srcptr_swap__tmp = (x); \
626 (x) = (y); \
627 (y) = __mp_srcptr_swap__tmp; \
628 } while (0)
630 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
631 do { \
632 MP_PTR_SWAP (xp, yp); \
633 MP_SIZE_T_SWAP (xs, ys); \
634 } while(0)
635 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
636 do { \
637 MP_SRCPTR_SWAP (xp, yp); \
638 MP_SIZE_T_SWAP (xs, ys); \
639 } while(0)
641 #define MPZ_PTR_SWAP(x, y) \
642 do { \
643 mpz_ptr __mpz_ptr_swap__tmp = (x); \
644 (x) = (y); \
645 (y) = __mpz_ptr_swap__tmp; \
646 } while (0)
647 #define MPZ_SRCPTR_SWAP(x, y) \
648 do { \
649 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
650 (x) = (y); \
651 (y) = __mpz_srcptr_swap__tmp; \
652 } while (0)
655 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
656 but current gcc (3.0) doesn't seem to support that. */
657 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
658 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
659 __GMP_DECLSPEC extern void (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
661 __GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t));
662 __GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
663 __GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
665 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
666 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
667 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
669 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
670 ((type *) (*__gmp_reallocate_func) \
671 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
672 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
673 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
675 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
676 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
678 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
679 do { \
680 if ((oldsize) != (newsize)) \
681 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
682 } while (0)
684 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
685 do { \
686 if ((oldsize) != (newsize)) \
687 (ptr) = (type *) (*__gmp_reallocate_func) \
688 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
689 } while (0)
692 /* Dummy for non-gcc, code involving it will go dead. */
693 #if ! defined (__GNUC__) || __GNUC__ < 2
694 #define __builtin_constant_p(x) 0
695 #endif
698 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
699 stack usage is compatible. __attribute__ ((regparm (N))) helps by
700 putting leading parameters in registers, avoiding extra stack.
702 regparm cannot be used with calls going through the PLT, because the
703 binding code there may clobber the registers (%eax, %edx, %ecx) used for
704 the regparm parameters. Calls to local (ie. static) functions could
705 still use this, if we cared to differentiate locals and globals.
707 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
708 -p or -pg profiling, since that version of gcc doesn't realize the
709 .mcount calls will clobber the parameter registers. Other systems are
710 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
711 bother to try to detect this. regparm is only an optimization so we just
712 disable it when profiling (profiling being a slowdown anyway). */
714 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
715 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
716 #define USE_LEADING_REGPARM 1
717 #else
718 #define USE_LEADING_REGPARM 0
719 #endif
721 /* Macros for altering parameter order according to regparm usage. */
722 #if USE_LEADING_REGPARM
723 #define REGPARM_2_1(a,b,x) x,a,b
724 #define REGPARM_3_1(a,b,c,x) x,a,b,c
725 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
726 #else
727 #define REGPARM_2_1(a,b,x) a,b,x
728 #define REGPARM_3_1(a,b,c,x) a,b,c,x
729 #define REGPARM_ATTR(n)
730 #endif
733 /* ASM_L gives a local label for a gcc asm block, for use when temporary
734 local labels like "1:" might not be available, which is the case for
735 instance on the x86s (the SCO assembler doesn't support them).
737 The label generated is made unique by including "%=" which is a unique
738 number for each insn. This ensures the same name can be used in multiple
739 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
740 allowed there's no need for a label to be usable outside a single
741 block. */
743 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
746 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
747 #if 0
748 /* FIXME: Check that these actually improve things.
749 FIXME: Need a cld after each std.
750 FIXME: Can't have inputs in clobbered registers, must describe them as
751 dummy outputs, and add volatile. */
752 #define MPN_COPY_INCR(DST, SRC, N) \
753 __asm__ ("cld\n\trep\n\tmovsl" : : \
754 "D" (DST), "S" (SRC), "c" (N) : \
755 "cx", "di", "si", "memory")
756 #define MPN_COPY_DECR(DST, SRC, N) \
757 __asm__ ("std\n\trep\n\tmovsl" : : \
758 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
759 "cx", "di", "si", "memory")
760 #endif
761 #endif
764 __GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
765 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
767 #define mpz_n_pow_ui __gmpz_n_pow_ui
768 __GMP_DECLSPEC void mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
771 #define mpn_addmul_1c __MPN(addmul_1c)
772 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
774 #define mpn_addmul_2 __MPN(addmul_2)
775 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
777 #define mpn_addmul_3 __MPN(addmul_3)
778 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
780 #define mpn_addmul_4 __MPN(addmul_4)
781 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
783 #define mpn_addmul_5 __MPN(addmul_5)
784 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
786 #define mpn_addmul_6 __MPN(addmul_6)
787 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
789 #define mpn_addmul_7 __MPN(addmul_7)
790 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
792 #define mpn_addmul_8 __MPN(addmul_8)
793 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
795 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
796 returns the carry out (0, 1 or 2). */
797 #define mpn_addlsh1_n __MPN(addlsh1_n)
798 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
800 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
801 returns the carry out (0, ..., 4). */
802 #define mpn_addlsh2_n __MPN(addlsh2_n)
803 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
805 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
806 returns the carry out (0, ..., 2^k). */
807 #define mpn_addlsh_n __MPN(addlsh_n)
808 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
810 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
811 returns the borrow out (0, 1 or 2). */
812 #define mpn_sublsh1_n __MPN(sublsh1_n)
813 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
815 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
816 returns the carry out (-1, 0, 1). */
817 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
818 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
820 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
821 returns the borrow out (FIXME 0, 1, 2 or 3). */
822 #define mpn_sublsh2_n __MPN(sublsh2_n)
823 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
825 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
826 returns the carry out (-1, ..., 3). */
827 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
828 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
830 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
831 returns the carry out (-1, 0, ..., 2^k-1). */
832 #define mpn_rsblsh_n __MPN(rsblsh_n)
833 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
835 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
836 and returns the bit rshifted out (0 or 1). */
837 #define mpn_rsh1add_n __MPN(rsh1add_n)
838 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
839 #define mpn_rsh1add_nc __MPN(rsh1add_nc)
840 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
842 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
843 and returns the bit rshifted out (0 or 1). If there's a borrow from the
844 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
845 complement negative. */
846 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
847 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
848 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
849 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
851 #define mpn_lshiftc __MPN(lshiftc)
852 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
854 #define mpn_add_n_sub_n __MPN(add_n_sub_n)
855 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
857 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
858 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
860 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
861 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
863 #define mpn_divrem_1c __MPN(divrem_1c)
864 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
866 #define mpn_dump __MPN(dump)
867 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
869 #define mpn_fib2_ui __MPN(fib2_ui)
870 __GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
872 /* Remap names of internal mpn functions. */
873 #define __clz_tab __MPN(clz_tab)
874 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
876 #define mpn_jacobi_base __MPN(jacobi_base)
877 __GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
879 #define mpn_mod_1c __MPN(mod_1c)
880 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
882 #define mpn_mul_1c __MPN(mul_1c)
883 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
885 #define mpn_mul_2 __MPN(mul_2)
886 __GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
888 #define mpn_mul_3 __MPN(mul_3)
889 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
891 #define mpn_mul_4 __MPN(mul_4)
892 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
894 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
895 #define mpn_mul_basecase __MPN(mul_basecase)
896 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
897 #endif
899 #define mpn_mullo_n __MPN(mullo_n)
900 __GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
902 #define mpn_mullo_basecase __MPN(mullo_basecase)
903 __GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
905 #define mpn_sqr __MPN(sqr)
906 __GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
908 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
909 #define mpn_sqr_basecase __MPN(sqr_basecase)
910 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
911 #endif
913 #define mpn_submul_1c __MPN(submul_1c)
914 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
916 #define mpn_redc_1 __MPN(redc_1)
917 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
919 #define mpn_redc_2 __MPN(redc_2)
920 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
921 #define mpn_redc_n __MPN(redc_n)
922 __GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
925 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
926 __GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
927 #define mpn_mod_1_1p __MPN(mod_1_1p)
928 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE;
930 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
931 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
932 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
933 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE;
935 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
936 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
937 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
938 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE;
940 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
941 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
942 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
943 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE;
945 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
946 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
947 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
948 __GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
949 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
950 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
951 static inline mp_size_t
952 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
953 mp_size_t n, itch;
954 n = rn >> 1;
955 itch = rn + 4 +
956 (an > n ? (bn > n ? rn : n) : 0);
957 return itch;
960 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
961 __GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
962 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
963 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
964 static inline mp_size_t
965 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
966 mp_size_t n, itch;
967 n = rn >> 1;
968 itch = rn + 3 +
969 (an > n ? an : 0);
970 return itch;
973 typedef __gmp_randstate_struct *gmp_randstate_ptr;
974 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
976 /* Pseudo-random number generator function pointers structure. */
977 typedef struct {
978 void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
979 void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
980 void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
981 void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
982 } gmp_randfnptr_t;
984 /* Macro to obtain a void pointer to the function pointers structure. */
985 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
987 /* Macro to obtain a pointer to the generator's state.
988 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
989 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
991 /* Write a given number of random bits to rp. */
992 #define _gmp_rand(rp, state, bits) \
993 do { \
994 gmp_randstate_ptr __rstate = (state); \
995 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
996 (__rstate, rp, bits); \
997 } while (0)
999 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
1002 /* __gmp_rands is the global state for the old-style random functions, and
1003 is also used in the test programs (hence the __GMP_DECLSPEC).
1005 There's no seeding here, so mpz_random etc will generate the same
1006 sequence every time. This is not unlike the C library random functions
1007 if you don't seed them, so perhaps it's acceptable. Digging up a seed
1008 from /dev/random or the like would work on many systems, but might
1009 encourage a false confidence, since it'd be pretty much impossible to do
1010 something that would work reliably everywhere. In any case the new style
1011 functions are recommended to applications which care about randomness, so
1012 the old functions aren't too important. */
1014 __GMP_DECLSPEC extern char __gmp_rands_initialized;
1015 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
1017 #define RANDS \
1018 ((__gmp_rands_initialized ? 0 \
1019 : (__gmp_rands_initialized = 1, \
1020 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
1021 __gmp_rands)
1023 /* this is used by the test programs, to free memory */
1024 #define RANDS_CLEAR() \
1025 do { \
1026 if (__gmp_rands_initialized) \
1028 __gmp_rands_initialized = 0; \
1029 gmp_randclear (__gmp_rands); \
1031 } while (0)
1034 /* For a threshold between algorithms A and B, size>=thresh is where B
1035 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1036 value 0 means only ever use B. The tests for these special values will
1037 be compile-time constants, so the compiler should be able to eliminate
1038 the code for the unwanted algorithm. */
1040 #define ABOVE_THRESHOLD(size,thresh) \
1041 ((thresh) == 0 \
1042 || ((thresh) != MP_SIZE_T_MAX \
1043 && (size) >= (thresh)))
1044 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1046 #define MPN_TOOM22_MUL_MINSIZE 4
1047 #define MPN_TOOM2_SQR_MINSIZE 4
1049 #define MPN_TOOM33_MUL_MINSIZE 17
1050 #define MPN_TOOM3_SQR_MINSIZE 17
1052 #define MPN_TOOM44_MUL_MINSIZE 30
1053 #define MPN_TOOM4_SQR_MINSIZE 30
1055 #define MPN_TOOM6H_MUL_MINSIZE 46
1056 #define MPN_TOOM6_SQR_MINSIZE 46
1058 #define MPN_TOOM8H_MUL_MINSIZE 86
1059 #define MPN_TOOM8_SQR_MINSIZE 86
1061 #define MPN_TOOM32_MUL_MINSIZE 10
1062 #define MPN_TOOM42_MUL_MINSIZE 10
1063 #define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
1064 #define MPN_TOOM53_MUL_MINSIZE 49 /* ??? */
1065 #define MPN_TOOM63_MUL_MINSIZE 49
1067 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1068 __GMP_DECLSPEC void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1070 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1071 __GMP_DECLSPEC void mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
1073 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1074 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1075 __GMP_DECLSPEC void mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t));
1077 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1078 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1079 __GMP_DECLSPEC void mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1081 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1082 __GMP_DECLSPEC void mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1084 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1085 __GMP_DECLSPEC void mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
1087 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1088 __GMP_DECLSPEC void mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
1090 #define mpn_toom_couple_handling __MPN(toom_couple_handling)
1091 __GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
1093 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1094 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1096 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1097 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1099 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1100 __GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1102 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1103 __GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1105 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1106 __GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
1108 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1109 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
1111 #define mpn_toom22_mul __MPN(toom22_mul)
1112 __GMP_DECLSPEC void mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1114 #define mpn_toom32_mul __MPN(toom32_mul)
1115 __GMP_DECLSPEC void mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1117 #define mpn_toom42_mul __MPN(toom42_mul)
1118 __GMP_DECLSPEC void mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1120 #define mpn_toom52_mul __MPN(toom52_mul)
1121 __GMP_DECLSPEC void mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1123 #define mpn_toom62_mul __MPN(toom62_mul)
1124 __GMP_DECLSPEC void mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1126 #define mpn_toom2_sqr __MPN(toom2_sqr)
1127 __GMP_DECLSPEC void mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1129 #define mpn_toom33_mul __MPN(toom33_mul)
1130 __GMP_DECLSPEC void mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1132 #define mpn_toom43_mul __MPN(toom43_mul)
1133 __GMP_DECLSPEC void mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1135 #define mpn_toom53_mul __MPN(toom53_mul)
1136 __GMP_DECLSPEC void mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1138 #define mpn_toom63_mul __MPN(toom63_mul)
1139 __GMP_DECLSPEC void mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1141 #define mpn_toom3_sqr __MPN(toom3_sqr)
1142 __GMP_DECLSPEC void mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1144 #define mpn_toom44_mul __MPN(toom44_mul)
1145 __GMP_DECLSPEC void mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1147 #define mpn_toom4_sqr __MPN(toom4_sqr)
1148 __GMP_DECLSPEC void mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1150 #define mpn_toom6h_mul __MPN(toom6h_mul)
1151 __GMP_DECLSPEC void mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1153 #define mpn_toom6_sqr __MPN(toom6_sqr)
1154 __GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1156 #define mpn_toom8h_mul __MPN(toom8h_mul)
1157 __GMP_DECLSPEC void mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1159 #define mpn_toom8_sqr __MPN(toom8_sqr)
1160 __GMP_DECLSPEC void mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1162 #define mpn_fft_best_k __MPN(fft_best_k)
1163 __GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1165 #define mpn_mul_fft __MPN(mul_fft)
1166 __GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
1168 #define mpn_mul_fft_full __MPN(mul_fft_full)
1169 __GMP_DECLSPEC void mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1171 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1172 __GMP_DECLSPEC void mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1174 #define mpn_fft_next_size __MPN(fft_next_size)
1175 __GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1177 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1178 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1180 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1181 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1183 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1184 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1186 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1187 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1188 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1189 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
1191 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1192 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1194 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1195 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1196 #define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
1197 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
1199 #define mpn_mu_div_qr __MPN(mu_div_qr)
1200 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1201 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1202 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1203 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1204 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1206 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1207 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1208 #define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
1209 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
1211 #define mpn_mu_divappr_q __MPN(mu_divappr_q)
1212 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1213 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1214 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1215 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1216 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1218 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1219 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1221 #define mpn_mu_div_q __MPN(mu_div_q)
1222 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1223 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1224 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1226 #define mpn_div_q __MPN(div_q)
1227 __GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1229 #define mpn_invert __MPN(invert)
1230 __GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1231 #define mpn_invert_itch(n) mpn_invertappr_itch(n)
1233 #define mpn_ni_invertappr __MPN(ni_invertappr)
1234 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1235 #define mpn_invertappr __MPN(invertappr)
1236 __GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1237 #define mpn_invertappr_itch(n) (3 * (n) + 2)
1239 #define mpn_binvert __MPN(binvert)
1240 __GMP_DECLSPEC void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1241 #define mpn_binvert_itch __MPN(binvert_itch)
1242 __GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
1244 #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1245 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1247 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1248 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
1250 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1251 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1253 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1254 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1256 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1257 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1258 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1259 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
1261 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1262 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1263 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1264 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1266 #define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
1267 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
1268 #define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
1269 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1271 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1272 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1273 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1274 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1276 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1277 __GMP_DECLSPEC void mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1278 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1279 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1281 #define mpn_bdiv_qr __MPN(bdiv_qr)
1282 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1283 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1284 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1286 #define mpn_bdiv_q __MPN(bdiv_q)
1287 __GMP_DECLSPEC void mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1288 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1289 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1291 #define mpn_divexact __MPN(divexact)
1292 __GMP_DECLSPEC void mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1293 #define mpn_divexact_itch __MPN(divexact_itch)
1294 __GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1296 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1297 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
1298 #define mpn_bdiv_dbm1(dst, src, size, divisor) \
1299 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1301 #define mpn_powm __MPN(powm)
1302 __GMP_DECLSPEC void mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1303 #define mpn_powlo __MPN(powlo)
1304 __GMP_DECLSPEC void mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1305 #define mpn_powm_sec __MPN(powm_sec)
1306 __GMP_DECLSPEC void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1307 #define mpn_powm_sec_itch __MPN(powm_sec_itch)
1308 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
1309 #define mpn_subcnd_n __MPN(subcnd_n)
1310 __GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
1311 #define mpn_tabselect __MPN(tabselect)
1312 __GMP_DECLSPEC void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
1313 #define mpn_redc_1_sec __MPN(redc_1_sec)
1314 __GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1316 #ifndef DIVEXACT_BY3_METHOD
1317 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1318 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */
1319 #else
1320 #define DIVEXACT_BY3_METHOD 1
1321 #endif
1322 #endif
1324 #if DIVEXACT_BY3_METHOD == 0
1325 #undef mpn_divexact_by3
1326 #define mpn_divexact_by3(dst,src,size) \
1327 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1328 /* override mpn_divexact_by3c defined in gmp.h */
1330 #undef mpn_divexact_by3c
1331 #define mpn_divexact_by3c(dst,src,size,cy) \
1332 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1334 #endif
1336 #if GMP_NUMB_BITS % 4 == 0
1337 #define mpn_divexact_by5(dst,src,size) \
1338 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1339 #endif
1341 #if GMP_NUMB_BITS % 6 == 0
1342 #define mpn_divexact_by7(dst,src,size) \
1343 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1344 #endif
1346 #if GMP_NUMB_BITS % 6 == 0
1347 #define mpn_divexact_by9(dst,src,size) \
1348 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1349 #endif
1351 #if GMP_NUMB_BITS % 10 == 0
1352 #define mpn_divexact_by11(dst,src,size) \
1353 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1354 #endif
1356 #if GMP_NUMB_BITS % 12 == 0
1357 #define mpn_divexact_by13(dst,src,size) \
1358 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1359 #endif
1361 #if GMP_NUMB_BITS % 4 == 0
1362 #define mpn_divexact_by15(dst,src,size) \
1363 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1364 #endif
1366 #define mpz_divexact_gcd __gmpz_divexact_gcd
1367 __GMP_DECLSPEC void mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
1369 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1370 #ifdef _GMP_H_HAVE_FILE
1371 __GMP_DECLSPEC size_t mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
1372 #endif
1374 #define mpn_divisible_p __MPN(divisible_p)
1375 __GMP_DECLSPEC int mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
1377 #define mpn_rootrem __MPN(rootrem)
1378 __GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1381 #if defined (_CRAY)
1382 #define MPN_COPY_INCR(dst, src, n) \
1383 do { \
1384 int __i; /* Faster on some Crays with plain int */ \
1385 _Pragma ("_CRI ivdep"); \
1386 for (__i = 0; __i < (n); __i++) \
1387 (dst)[__i] = (src)[__i]; \
1388 } while (0)
1389 #endif
1391 /* used by test programs, hence __GMP_DECLSPEC */
1392 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1393 #define mpn_copyi __MPN(copyi)
1394 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1395 #endif
1397 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1398 #define MPN_COPY_INCR(dst, src, size) \
1399 do { \
1400 ASSERT ((size) >= 0); \
1401 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1402 mpn_copyi (dst, src, size); \
1403 } while (0)
1404 #endif
1406 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1407 #if ! defined (MPN_COPY_INCR)
1408 #define MPN_COPY_INCR(dst, src, n) \
1409 do { \
1410 ASSERT ((n) >= 0); \
1411 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1412 if ((n) != 0) \
1414 mp_size_t __n = (n) - 1; \
1415 mp_ptr __dst = (dst); \
1416 mp_srcptr __src = (src); \
1417 mp_limb_t __x; \
1418 __x = *__src++; \
1419 if (__n != 0) \
1421 do \
1423 *__dst++ = __x; \
1424 __x = *__src++; \
1426 while (--__n); \
1428 *__dst++ = __x; \
1430 } while (0)
1431 #endif
1434 #if defined (_CRAY)
1435 #define MPN_COPY_DECR(dst, src, n) \
1436 do { \
1437 int __i; /* Faster on some Crays with plain int */ \
1438 _Pragma ("_CRI ivdep"); \
1439 for (__i = (n) - 1; __i >= 0; __i--) \
1440 (dst)[__i] = (src)[__i]; \
1441 } while (0)
1442 #endif
1444 /* used by test programs, hence __GMP_DECLSPEC */
1445 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1446 #define mpn_copyd __MPN(copyd)
1447 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1448 #endif
1450 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1451 #define MPN_COPY_DECR(dst, src, size) \
1452 do { \
1453 ASSERT ((size) >= 0); \
1454 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1455 mpn_copyd (dst, src, size); \
1456 } while (0)
1457 #endif
1459 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1460 #if ! defined (MPN_COPY_DECR)
1461 #define MPN_COPY_DECR(dst, src, n) \
1462 do { \
1463 ASSERT ((n) >= 0); \
1464 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1465 if ((n) != 0) \
1467 mp_size_t __n = (n) - 1; \
1468 mp_ptr __dst = (dst) + __n; \
1469 mp_srcptr __src = (src) + __n; \
1470 mp_limb_t __x; \
1471 __x = *__src--; \
1472 if (__n != 0) \
1474 do \
1476 *__dst-- = __x; \
1477 __x = *__src--; \
1479 while (--__n); \
1481 *__dst-- = __x; \
1483 } while (0)
1484 #endif
1487 #ifndef MPN_COPY
1488 #define MPN_COPY(d,s,n) \
1489 do { \
1490 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1491 MPN_COPY_INCR (d, s, n); \
1492 } while (0)
1493 #endif
1496 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1497 #define MPN_REVERSE(dst, src, size) \
1498 do { \
1499 mp_ptr __dst = (dst); \
1500 mp_size_t __size = (size); \
1501 mp_srcptr __src = (src) + __size - 1; \
1502 mp_size_t __i; \
1503 ASSERT ((size) >= 0); \
1504 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1505 CRAY_Pragma ("_CRI ivdep"); \
1506 for (__i = 0; __i < __size; __i++) \
1508 *__dst = *__src; \
1509 __dst++; \
1510 __src--; \
1512 } while (0)
1515 /* Zero n limbs at dst.
1517 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1518 ppc630 for instance this is optimal since it can sustain only 1 store per
1519 cycle.
1521 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1522 "for" loop in the generic code below can become stu/bdnz. The do/while
1523 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1524 applies here as to __GMPN_COPY_INCR in gmp.h.
1526 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1527 this loop too.
1529 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1530 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1531 trouble than it's worth to do the same, though perhaps a call to memset
1532 would be good when on a GNU system. */
1534 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1535 #define MPN_ZERO(dst, n) \
1536 do { \
1537 ASSERT ((n) >= 0); \
1538 if ((n) != 0) \
1540 mp_ptr __dst = (dst) - 1; \
1541 mp_size_t __n = (n); \
1542 do \
1543 *++__dst = 0; \
1544 while (--__n); \
1546 } while (0)
1547 #endif
1549 #ifndef MPN_ZERO
1550 #define MPN_ZERO(dst, n) \
1551 do { \
1552 ASSERT ((n) >= 0); \
1553 if ((n) != 0) \
1555 mp_ptr __dst = (dst); \
1556 mp_size_t __n = (n); \
1557 do \
1558 *__dst++ = 0; \
1559 while (--__n); \
1561 } while (0)
1562 #endif
1565 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1566 start up and would need to strip a lot of zeros before it'd be faster
1567 than a simple cmpl loop. Here are some times in cycles for
1568 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1569 low zeros).
1571 std cld
1572 P5 18 16
1573 P6 46 38
1574 K6 36 13
1575 K7 21 20
1577 #ifndef MPN_NORMALIZE
1578 #define MPN_NORMALIZE(DST, NLIMBS) \
1579 do { \
1580 while ((NLIMBS) > 0) \
1582 if ((DST)[(NLIMBS) - 1] != 0) \
1583 break; \
1584 (NLIMBS)--; \
1586 } while (0)
1587 #endif
1588 #ifndef MPN_NORMALIZE_NOT_ZERO
1589 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1590 do { \
1591 ASSERT ((NLIMBS) >= 1); \
1592 while (1) \
1594 if ((DST)[(NLIMBS) - 1] != 0) \
1595 break; \
1596 (NLIMBS)--; \
1598 } while (0)
1599 #endif
1601 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1602 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1603 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1604 somewhere a non-zero limb. */
1605 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1606 do { \
1607 ASSERT ((size) >= 1); \
1608 ASSERT ((low) == (ptr)[0]); \
1610 while ((low) == 0) \
1612 (size)--; \
1613 ASSERT ((size) >= 1); \
1614 (ptr)++; \
1615 (low) = *(ptr); \
1617 } while (0)
1619 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1620 temporary variable; it will be automatically cleared out at function
1621 return. We use __x here to make it possible to accept both mpz_ptr and
1622 mpz_t arguments. */
1623 #define MPZ_TMP_INIT(X, NLIMBS) \
1624 do { \
1625 mpz_ptr __x = (X); \
1626 ASSERT ((NLIMBS) >= 1); \
1627 __x->_mp_alloc = (NLIMBS); \
1628 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \
1629 } while (0)
1631 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
1632 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
1633 ? (mp_ptr) _mpz_realloc(z,n) \
1634 : PTR(z))
1636 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
1639 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1640 f1p.
1642 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1643 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
1644 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1646 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1647 without good floating point. There's +2 for rounding up, and a further
1648 +2 since at the last step x limbs are doubled into a 2x+1 limb region
1649 whereas the actual F[2k] value might be only 2x-1 limbs.
1651 Note that a division is done first, since on a 32-bit system it's at
1652 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
1653 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1654 whatever a multiply of two 190Mbyte numbers takes.)
1656 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1657 worked into the multiplier. */
1659 #define MPN_FIB2_SIZE(n) \
1660 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1663 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
1664 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1666 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1667 F[n] + 2*F[n-1] fits in a limb. */
1669 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1670 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
1672 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */
1673 typedef struct
1675 unsigned long d; /* current index in s[] */
1676 unsigned long s0; /* number corresponding to s[0] */
1677 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */
1678 unsigned char s[SIEVESIZE + 1]; /* sieve table */
1679 } gmp_primesieve_t;
1681 #define gmp_init_primesieve __gmp_init_primesieve
1682 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
1684 #define gmp_nextprime __gmp_nextprime
1685 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
1688 #ifndef MUL_TOOM22_THRESHOLD
1689 #define MUL_TOOM22_THRESHOLD 30
1690 #endif
1692 #ifndef MUL_TOOM33_THRESHOLD
1693 #define MUL_TOOM33_THRESHOLD 100
1694 #endif
1696 #ifndef MUL_TOOM44_THRESHOLD
1697 #define MUL_TOOM44_THRESHOLD 300
1698 #endif
1700 #ifndef MUL_TOOM6H_THRESHOLD
1701 #define MUL_TOOM6H_THRESHOLD 350
1702 #endif
1704 #ifndef SQR_TOOM6_THRESHOLD
1705 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
1706 #endif
1708 #ifndef MUL_TOOM8H_THRESHOLD
1709 #define MUL_TOOM8H_THRESHOLD 450
1710 #endif
1712 #ifndef SQR_TOOM8_THRESHOLD
1713 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
1714 #endif
1716 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
1717 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
1718 #endif
1720 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
1721 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
1722 #endif
1724 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
1725 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100
1726 #endif
1728 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
1729 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
1730 #endif
1732 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
1733 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
1734 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
1735 separate hard limit will have been defined. Similarly for TOOM3. */
1736 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
1737 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD
1738 #endif
1739 #ifndef MUL_TOOM33_THRESHOLD_LIMIT
1740 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD
1741 #endif
1742 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
1743 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD
1744 #endif
1746 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1747 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we
1748 certainly always want it if there's a native assembler mpn_sqr_basecase.)
1750 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
1751 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
1752 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less
1753 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
1754 should be used, and that may be never. */
1756 #ifndef SQR_BASECASE_THRESHOLD
1757 #define SQR_BASECASE_THRESHOLD 0
1758 #endif
1760 #ifndef SQR_TOOM2_THRESHOLD
1761 #define SQR_TOOM2_THRESHOLD 50
1762 #endif
1764 #ifndef SQR_TOOM3_THRESHOLD
1765 #define SQR_TOOM3_THRESHOLD 120
1766 #endif
1768 #ifndef SQR_TOOM4_THRESHOLD
1769 #define SQR_TOOM4_THRESHOLD 400
1770 #endif
1772 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */
1773 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1774 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
1775 #endif
1777 #ifndef DC_DIV_QR_THRESHOLD
1778 #define DC_DIV_QR_THRESHOLD 50
1779 #endif
1781 #ifndef DC_DIVAPPR_Q_THRESHOLD
1782 #define DC_DIVAPPR_Q_THRESHOLD 200
1783 #endif
1785 #ifndef DC_BDIV_QR_THRESHOLD
1786 #define DC_BDIV_QR_THRESHOLD 50
1787 #endif
1789 #ifndef DC_BDIV_Q_THRESHOLD
1790 #define DC_BDIV_Q_THRESHOLD 180
1791 #endif
1793 #ifndef DIVEXACT_JEB_THRESHOLD
1794 #define DIVEXACT_JEB_THRESHOLD 25
1795 #endif
1797 #ifndef INV_MULMOD_BNM1_THRESHOLD
1798 #define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD)
1799 #endif
1801 #ifndef INV_APPR_THRESHOLD
1802 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD
1803 #endif
1805 #ifndef INV_NEWTON_THRESHOLD
1806 #define INV_NEWTON_THRESHOLD 200
1807 #endif
1809 #ifndef BINV_NEWTON_THRESHOLD
1810 #define BINV_NEWTON_THRESHOLD 300
1811 #endif
1813 #ifndef MU_DIVAPPR_Q_THRESHOLD
1814 #define MU_DIVAPPR_Q_THRESHOLD 2000
1815 #endif
1817 #ifndef MU_DIV_QR_THRESHOLD
1818 #define MU_DIV_QR_THRESHOLD 2000
1819 #endif
1821 #ifndef MUPI_DIV_QR_THRESHOLD
1822 #define MUPI_DIV_QR_THRESHOLD 200
1823 #endif
1825 #ifndef MU_BDIV_Q_THRESHOLD
1826 #define MU_BDIV_Q_THRESHOLD 2000
1827 #endif
1829 #ifndef MU_BDIV_QR_THRESHOLD
1830 #define MU_BDIV_QR_THRESHOLD 2000
1831 #endif
1833 #ifndef MULMOD_BNM1_THRESHOLD
1834 #define MULMOD_BNM1_THRESHOLD 16
1835 #endif
1837 #ifndef SQRMOD_BNM1_THRESHOLD
1838 #define SQRMOD_BNM1_THRESHOLD 16
1839 #endif
1841 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
1842 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2)
1843 #endif
1845 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
1847 #ifndef REDC_1_TO_REDC_2_THRESHOLD
1848 #define REDC_1_TO_REDC_2_THRESHOLD 15
1849 #endif
1850 #ifndef REDC_2_TO_REDC_N_THRESHOLD
1851 #define REDC_2_TO_REDC_N_THRESHOLD 100
1852 #endif
1854 #else
1856 #ifndef REDC_1_TO_REDC_N_THRESHOLD
1857 #define REDC_1_TO_REDC_N_THRESHOLD 100
1858 #endif
1860 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
1863 /* First k to use for an FFT modF multiply. A modF FFT is an order
1864 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1865 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
1866 #define FFT_FIRST_K 4
1868 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1869 #ifndef MUL_FFT_MODF_THRESHOLD
1870 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3)
1871 #endif
1872 #ifndef SQR_FFT_MODF_THRESHOLD
1873 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
1874 #endif
1876 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
1877 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1878 NxN->2N multiply and not recursing into itself is an order
1879 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1880 is the first better than toom3. */
1881 #ifndef MUL_FFT_THRESHOLD
1882 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
1883 #endif
1884 #ifndef SQR_FFT_THRESHOLD
1885 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
1886 #endif
1888 /* Table of thresholds for successive modF FFT "k"s. The first entry is
1889 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1890 etc. See mpn_fft_best_k(). */
1891 #ifndef MUL_FFT_TABLE
1892 #define MUL_FFT_TABLE \
1893 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
1894 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
1895 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
1896 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
1897 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
1898 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
1900 #endif
1901 #ifndef SQR_FFT_TABLE
1902 #define SQR_FFT_TABLE \
1903 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
1904 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
1905 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
1906 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
1907 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
1908 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
1910 #endif
1912 struct fft_table_nk
1914 unsigned int n:27;
1915 unsigned int k:5;
1918 #ifndef FFT_TABLE_ATTRS
1919 #define FFT_TABLE_ATTRS static const
1920 #endif
1922 #define MPN_FFT_TABLE_SIZE 16
1925 #ifndef DC_DIV_QR_THRESHOLD
1926 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD)
1927 #endif
1929 #ifndef GET_STR_DC_THRESHOLD
1930 #define GET_STR_DC_THRESHOLD 18
1931 #endif
1933 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1934 #define GET_STR_PRECOMPUTE_THRESHOLD 35
1935 #endif
1937 #ifndef SET_STR_DC_THRESHOLD
1938 #define SET_STR_DC_THRESHOLD 750
1939 #endif
1941 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
1942 #define SET_STR_PRECOMPUTE_THRESHOLD 2000
1943 #endif
1945 /* Return non-zero if xp,xsize and yp,ysize overlap.
1946 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1947 overlap. If both these are false, there's an overlap. */
1948 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1949 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1950 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
1951 ( (char *) (xp) + (xsize) > (char *) (yp) \
1952 && (char *) (yp) + (ysize) > (char *) (xp))
1954 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1955 overlapping. Return zero if they're partially overlapping. */
1956 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
1957 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1958 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
1959 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1961 /* Return non-zero if dst,dsize and src,ssize are either identical or
1962 overlapping in a way suitable for an incrementing/decrementing algorithm.
1963 Return zero if they're partially overlapping in an unsuitable fashion. */
1964 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
1965 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1966 #define MPN_SAME_OR_INCR_P(dst, src, size) \
1967 MPN_SAME_OR_INCR2_P(dst, size, src, size)
1968 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
1969 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1970 #define MPN_SAME_OR_DECR_P(dst, src, size) \
1971 MPN_SAME_OR_DECR2_P(dst, size, src, size)
1974 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1975 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1976 does it always. Generally assertions are meant for development, but
1977 might help when looking for a problem later too.
1979 Note that strings shouldn't be used within the ASSERT expression,
1980 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1981 used in the !HAVE_STRINGIZE case (ie. K&R). */
1983 #ifdef __LINE__
1984 #define ASSERT_LINE __LINE__
1985 #else
1986 #define ASSERT_LINE -1
1987 #endif
1989 #ifdef __FILE__
1990 #define ASSERT_FILE __FILE__
1991 #else
1992 #define ASSERT_FILE ""
1993 #endif
1995 __GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int));
1996 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
1998 #if HAVE_STRINGIZE
1999 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
2000 #else
2001 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
2002 #endif
2004 #define ASSERT_ALWAYS(expr) \
2005 do { \
2006 if (!(expr)) \
2007 ASSERT_FAIL (expr); \
2008 } while (0)
2010 #if WANT_ASSERT
2011 #define ASSERT(expr) ASSERT_ALWAYS (expr)
2012 #else
2013 #define ASSERT(expr) do {} while (0)
2014 #endif
2017 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2018 that it's zero. In both cases if assertion checking is disabled the
2019 expression is still evaluated. These macros are meant for use with
2020 routines like mpn_add_n() where the return value represents a carry or
2021 whatever that should or shouldn't occur in some context. For example,
2022 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2023 #if WANT_ASSERT
2024 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
2025 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
2026 #else
2027 #define ASSERT_CARRY(expr) (expr)
2028 #define ASSERT_NOCARRY(expr) (expr)
2029 #endif
2032 /* ASSERT_CODE includes code when assertion checking is wanted. This is the
2033 same as writing "#if WANT_ASSERT", but more compact. */
2034 #if WANT_ASSERT
2035 #define ASSERT_CODE(expr) expr
2036 #else
2037 #define ASSERT_CODE(expr)
2038 #endif
2041 /* Test that an mpq_t is in fully canonical form. This can be used as
2042 protection on routines like mpq_equal which give wrong results on
2043 non-canonical inputs. */
2044 #if WANT_ASSERT
2045 #define ASSERT_MPQ_CANONICAL(q) \
2046 do { \
2047 ASSERT (q->_mp_den._mp_size > 0); \
2048 if (q->_mp_num._mp_size == 0) \
2050 /* zero should be 0/1 */ \
2051 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
2053 else \
2055 /* no common factors */ \
2056 mpz_t __g; \
2057 mpz_init (__g); \
2058 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
2059 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
2060 mpz_clear (__g); \
2062 } while (0)
2063 #else
2064 #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
2065 #endif
2067 /* Check that the nail parts are zero. */
2068 #define ASSERT_ALWAYS_LIMB(limb) \
2069 do { \
2070 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
2071 ASSERT_ALWAYS (__nail == 0); \
2072 } while (0)
2073 #define ASSERT_ALWAYS_MPN(ptr, size) \
2074 do { \
2075 /* let whole loop go dead when no nails */ \
2076 if (GMP_NAIL_BITS != 0) \
2078 mp_size_t __i; \
2079 for (__i = 0; __i < (size); __i++) \
2080 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
2082 } while (0)
2083 #if WANT_ASSERT
2084 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
2085 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
2086 #else
2087 #define ASSERT_LIMB(limb) do {} while (0)
2088 #define ASSERT_MPN(ptr, size) do {} while (0)
2089 #endif
2092 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
2093 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
2094 #if WANT_ASSERT
2095 #define ASSERT_MPN_ZERO_P(ptr,size) \
2096 do { \
2097 mp_size_t __i; \
2098 ASSERT ((size) >= 0); \
2099 for (__i = 0; __i < (size); __i++) \
2100 ASSERT ((ptr)[__i] == 0); \
2101 } while (0)
2102 #define ASSERT_MPN_NONZERO_P(ptr,size) \
2103 do { \
2104 mp_size_t __i; \
2105 int __nonzero = 0; \
2106 ASSERT ((size) >= 0); \
2107 for (__i = 0; __i < (size); __i++) \
2108 if ((ptr)[__i] != 0) \
2110 __nonzero = 1; \
2111 break; \
2113 ASSERT (__nonzero); \
2114 } while (0)
2115 #else
2116 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
2117 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
2118 #endif
2121 #if ! HAVE_NATIVE_mpn_com
2122 #undef mpn_com
2123 #define mpn_com(d,s,n) \
2124 do { \
2125 mp_ptr __d = (d); \
2126 mp_srcptr __s = (s); \
2127 mp_size_t __n = (n); \
2128 ASSERT (__n >= 1); \
2129 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
2130 do \
2131 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
2132 while (--__n); \
2133 } while (0)
2134 #endif
2136 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \
2137 do { \
2138 mp_srcptr __up = (up); \
2139 mp_srcptr __vp = (vp); \
2140 mp_ptr __rp = (rp); \
2141 mp_size_t __n = (n); \
2142 mp_limb_t __a, __b; \
2143 ASSERT (__n > 0); \
2144 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \
2145 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \
2146 __up += __n; \
2147 __vp += __n; \
2148 __rp += __n; \
2149 __n = -__n; \
2150 do { \
2151 __a = __up[__n]; \
2152 __b = __vp[__n]; \
2153 __rp[__n] = operation; \
2154 } while (++__n); \
2155 } while (0)
2158 #if ! HAVE_NATIVE_mpn_and_n
2159 #undef mpn_and_n
2160 #define mpn_and_n(rp, up, vp, n) \
2161 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2162 #endif
2164 #if ! HAVE_NATIVE_mpn_andn_n
2165 #undef mpn_andn_n
2166 #define mpn_andn_n(rp, up, vp, n) \
2167 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2168 #endif
2170 #if ! HAVE_NATIVE_mpn_nand_n
2171 #undef mpn_nand_n
2172 #define mpn_nand_n(rp, up, vp, n) \
2173 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2174 #endif
2176 #if ! HAVE_NATIVE_mpn_ior_n
2177 #undef mpn_ior_n
2178 #define mpn_ior_n(rp, up, vp, n) \
2179 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2180 #endif
2182 #if ! HAVE_NATIVE_mpn_iorn_n
2183 #undef mpn_iorn_n
2184 #define mpn_iorn_n(rp, up, vp, n) \
2185 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2186 #endif
2188 #if ! HAVE_NATIVE_mpn_nior_n
2189 #undef mpn_nior_n
2190 #define mpn_nior_n(rp, up, vp, n) \
2191 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2192 #endif
2194 #if ! HAVE_NATIVE_mpn_xor_n
2195 #undef mpn_xor_n
2196 #define mpn_xor_n(rp, up, vp, n) \
2197 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2198 #endif
2200 #if ! HAVE_NATIVE_mpn_xnor_n
2201 #undef mpn_xnor_n
2202 #define mpn_xnor_n(rp, up, vp, n) \
2203 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2204 #endif
2206 #define mpn_trialdiv __MPN(trialdiv)
2207 __GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
2209 #define mpn_remove __MPN(remove)
2210 __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t));
2213 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2214 #if GMP_NAIL_BITS == 0
2215 #define ADDC_LIMB(cout, w, x, y) \
2216 do { \
2217 mp_limb_t __x = (x); \
2218 mp_limb_t __y = (y); \
2219 mp_limb_t __w = __x + __y; \
2220 (w) = __w; \
2221 (cout) = __w < __x; \
2222 } while (0)
2223 #else
2224 #define ADDC_LIMB(cout, w, x, y) \
2225 do { \
2226 mp_limb_t __w; \
2227 ASSERT_LIMB (x); \
2228 ASSERT_LIMB (y); \
2229 __w = (x) + (y); \
2230 (w) = __w & GMP_NUMB_MASK; \
2231 (cout) = __w >> GMP_NUMB_BITS; \
2232 } while (0)
2233 #endif
2235 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2236 subtract. */
2237 #if GMP_NAIL_BITS == 0
2238 #define SUBC_LIMB(cout, w, x, y) \
2239 do { \
2240 mp_limb_t __x = (x); \
2241 mp_limb_t __y = (y); \
2242 mp_limb_t __w = __x - __y; \
2243 (w) = __w; \
2244 (cout) = __w > __x; \
2245 } while (0)
2246 #else
2247 #define SUBC_LIMB(cout, w, x, y) \
2248 do { \
2249 mp_limb_t __w = (x) - (y); \
2250 (w) = __w & GMP_NUMB_MASK; \
2251 (cout) = __w >> (GMP_LIMB_BITS-1); \
2252 } while (0)
2253 #endif
2256 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2257 expecting no carry (or borrow) from that.
2259 The size parameter is only for the benefit of assertion checking. In a
2260 normal build it's unused and the carry/borrow is just propagated as far
2261 as it needs to go.
2263 On random data, usually only one or two limbs of {ptr,size} get updated,
2264 so there's no need for any sophisticated looping, just something compact
2265 and sensible.
2267 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2268 declaring their operand sizes, then remove the former. This is purely
2269 for the benefit of assertion checking. */
2271 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \
2272 && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
2273 /* Better flags handling than the generic C gives on i386, saving a few
2274 bytes of code and maybe a cycle or two. */
2276 #define MPN_IORD_U(ptr, incr, aors) \
2277 do { \
2278 mp_ptr __ptr_dummy; \
2279 if (__builtin_constant_p (incr) && (incr) == 1) \
2281 __asm__ __volatile__ \
2282 ("\n" ASM_L(top) ":\n" \
2283 "\t" aors " $1, (%0)\n" \
2284 "\tleal 4(%0),%0\n" \
2285 "\tjc " ASM_L(top) \
2286 : "=r" (__ptr_dummy) \
2287 : "0" (ptr) \
2288 : "memory"); \
2290 else \
2292 __asm__ __volatile__ \
2293 ( aors " %2,(%0)\n" \
2294 "\tjnc " ASM_L(done) "\n" \
2295 ASM_L(top) ":\n" \
2296 "\t" aors " $1,4(%0)\n" \
2297 "\tleal 4(%0),%0\n" \
2298 "\tjc " ASM_L(top) "\n" \
2299 ASM_L(done) ":\n" \
2300 : "=r" (__ptr_dummy) \
2301 : "0" (ptr), \
2302 "ri" (incr) \
2303 : "memory"); \
2305 } while (0)
2307 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
2308 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
2309 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
2310 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
2311 #endif
2313 #if GMP_NAIL_BITS == 0
2314 #ifndef mpn_incr_u
2315 #define mpn_incr_u(p,incr) \
2316 do { \
2317 mp_limb_t __x; \
2318 mp_ptr __p = (p); \
2319 if (__builtin_constant_p (incr) && (incr) == 1) \
2321 while (++(*(__p++)) == 0) \
2324 else \
2326 __x = *__p + (incr); \
2327 *__p = __x; \
2328 if (__x < (incr)) \
2329 while (++(*(++__p)) == 0) \
2332 } while (0)
2333 #endif
2334 #ifndef mpn_decr_u
2335 #define mpn_decr_u(p,incr) \
2336 do { \
2337 mp_limb_t __x; \
2338 mp_ptr __p = (p); \
2339 if (__builtin_constant_p (incr) && (incr) == 1) \
2341 while ((*(__p++))-- == 0) \
2344 else \
2346 __x = *__p; \
2347 *__p = __x - (incr); \
2348 if (__x < (incr)) \
2349 while ((*(++__p))-- == 0) \
2352 } while (0)
2353 #endif
2354 #endif
2356 #if GMP_NAIL_BITS >= 1
2357 #ifndef mpn_incr_u
2358 #define mpn_incr_u(p,incr) \
2359 do { \
2360 mp_limb_t __x; \
2361 mp_ptr __p = (p); \
2362 if (__builtin_constant_p (incr) && (incr) == 1) \
2364 do \
2366 __x = (*__p + 1) & GMP_NUMB_MASK; \
2367 *__p++ = __x; \
2369 while (__x == 0); \
2371 else \
2373 __x = (*__p + (incr)); \
2374 *__p++ = __x & GMP_NUMB_MASK; \
2375 if (__x >> GMP_NUMB_BITS != 0) \
2377 do \
2379 __x = (*__p + 1) & GMP_NUMB_MASK; \
2380 *__p++ = __x; \
2382 while (__x == 0); \
2385 } while (0)
2386 #endif
2387 #ifndef mpn_decr_u
2388 #define mpn_decr_u(p,incr) \
2389 do { \
2390 mp_limb_t __x; \
2391 mp_ptr __p = (p); \
2392 if (__builtin_constant_p (incr) && (incr) == 1) \
2394 do \
2396 __x = *__p; \
2397 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2399 while (__x == 0); \
2401 else \
2403 __x = *__p - (incr); \
2404 *__p++ = __x & GMP_NUMB_MASK; \
2405 if (__x >> GMP_NUMB_BITS != 0) \
2407 do \
2409 __x = *__p; \
2410 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2412 while (__x == 0); \
2415 } while (0)
2416 #endif
2417 #endif
2419 #ifndef MPN_INCR_U
2420 #if WANT_ASSERT
2421 #define MPN_INCR_U(ptr, size, n) \
2422 do { \
2423 ASSERT ((size) >= 1); \
2424 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
2425 } while (0)
2426 #else
2427 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
2428 #endif
2429 #endif
2431 #ifndef MPN_DECR_U
2432 #if WANT_ASSERT
2433 #define MPN_DECR_U(ptr, size, n) \
2434 do { \
2435 ASSERT ((size) >= 1); \
2436 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2437 } while (0)
2438 #else
2439 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2440 #endif
2441 #endif
2444 /* Structure for conversion between internal binary format and
2445 strings in base 2..36. */
2446 struct bases
2448 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2449 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2450 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
2451 int chars_per_limb;
2453 /* log(2)/log(conversion_base) */
2454 double chars_per_bit_exactly;
2456 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2457 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2458 i.e. the number of bits used to represent each digit in the base. */
2459 mp_limb_t big_base;
2461 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2462 fixed-point number. Instead of dividing by big_base an application can
2463 choose to multiply by big_base_inverted. */
2464 mp_limb_t big_base_inverted;
2467 #define mp_bases __MPN(bases)
2468 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2471 /* For power of 2 bases this is exact. For other bases the result is either
2472 exact or one too big.
2474 To be exact always it'd be necessary to examine all the limbs of the
2475 operand, since numbers like 100..000 and 99...999 generally differ only
2476 in the lowest limb. It'd be possible to examine just a couple of high
2477 limbs to increase the probability of being exact, but that doesn't seem
2478 worth bothering with. */
2480 #define MPN_SIZEINBASE(result, ptr, size, base) \
2481 do { \
2482 int __lb_base, __cnt; \
2483 size_t __totbits; \
2485 ASSERT ((size) >= 0); \
2486 ASSERT ((base) >= 2); \
2487 ASSERT ((base) < numberof (mp_bases)); \
2489 /* Special case for X == 0. */ \
2490 if ((size) == 0) \
2491 (result) = 1; \
2492 else \
2494 /* Calculate the total number of significant bits of X. */ \
2495 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2496 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2498 if (POW2_P (base)) \
2500 __lb_base = mp_bases[base].big_base; \
2501 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2503 else \
2504 (result) = (size_t) \
2505 (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \
2507 } while (0)
2509 /* eliminate mp_bases lookups for base==16 */
2510 #define MPN_SIZEINBASE_16(result, ptr, size) \
2511 do { \
2512 int __cnt; \
2513 mp_size_t __totbits; \
2515 ASSERT ((size) >= 0); \
2517 /* Special case for X == 0. */ \
2518 if ((size) == 0) \
2519 (result) = 1; \
2520 else \
2522 /* Calculate the total number of significant bits of X. */ \
2523 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2524 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2525 (result) = (__totbits + 4 - 1) / 4; \
2527 } while (0)
2529 /* bit count to limb count, rounding up */
2530 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2532 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2533 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2534 in both cases (LIMBS_PER_ULONG is also defined here.) */
2535 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2537 #define LIMBS_PER_ULONG 1
2538 #define MPN_SET_UI(zp, zn, u) \
2539 (zp)[0] = (u); \
2540 (zn) = ((zp)[0] != 0);
2541 #define MPZ_FAKE_UI(z, zp, u) \
2542 (zp)[0] = (u); \
2543 PTR (z) = (zp); \
2544 SIZ (z) = ((zp)[0] != 0); \
2545 ASSERT_CODE (ALLOC (z) = 1);
2547 #else /* need two limbs per ulong */
2549 #define LIMBS_PER_ULONG 2
2550 #define MPN_SET_UI(zp, zn, u) \
2551 (zp)[0] = (u) & GMP_NUMB_MASK; \
2552 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2553 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2554 #define MPZ_FAKE_UI(z, zp, u) \
2555 (zp)[0] = (u) & GMP_NUMB_MASK; \
2556 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2557 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2558 PTR (z) = (zp); \
2559 ASSERT_CODE (ALLOC (z) = 2);
2561 #endif
2564 #if HAVE_HOST_CPU_FAMILY_x86
2565 #define TARGET_REGISTER_STARVED 1
2566 #else
2567 #define TARGET_REGISTER_STARVED 0
2568 #endif
2571 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2572 or 0 there into a limb 0xFF..FF or 0 respectively.
2574 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2575 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2576 a little compile-time test and a fallback to a "? :" form. The latter is
2577 necessary for instance on Cray vector systems.
2579 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2580 to an arithmetic right shift anyway, but it's good to get the desired
2581 shift on past versions too (in particular since an important use of
2582 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
2584 #define LIMB_HIGHBIT_TO_MASK(n) \
2585 (((mp_limb_signed_t) -1 >> 1) < 0 \
2586 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
2587 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2590 /* Use a library function for invert_limb, if available. */
2591 #define mpn_invert_limb __MPN(invert_limb)
2592 __GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2593 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2594 #define invert_limb(invxl,xl) \
2595 do { \
2596 (invxl) = mpn_invert_limb (xl); \
2597 } while (0)
2598 #endif
2600 #ifndef invert_limb
2601 #define invert_limb(invxl,xl) \
2602 do { \
2603 mp_limb_t dummy; \
2604 ASSERT ((xl) != 0); \
2605 udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl); \
2606 } while (0)
2607 #endif
2609 #define invert_pi1(dinv, d1, d0) \
2610 do { \
2611 mp_limb_t v, p, t1, t0, mask; \
2612 invert_limb (v, d1); \
2613 p = d1 * v; \
2614 p += d0; \
2615 if (p < d0) \
2617 v--; \
2618 mask = -(p >= d1); \
2619 p -= d1; \
2620 v += mask; \
2621 p -= mask & d1; \
2623 umul_ppmm (t1, t0, d0, v); \
2624 p += t1; \
2625 if (p < t1) \
2627 v--; \
2628 if (UNLIKELY (p >= d1)) \
2630 if (p > d1 || t0 >= d0) \
2631 v--; \
2634 (dinv).inv32 = v; \
2635 } while (0)
2638 #ifndef udiv_qrnnd_preinv
2639 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3
2640 #endif
2642 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2643 limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
2644 If this would yield overflow, DI should be the largest possible number
2645 (i.e., only ones). For correct operation, the most significant bit of D
2646 has to be set. Put the quotient in Q and the remainder in R. */
2647 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di) \
2648 do { \
2649 mp_limb_t _q, _ql, _r; \
2650 mp_limb_t _xh, _xl; \
2651 ASSERT ((d) != 0); \
2652 umul_ppmm (_q, _ql, (nh), (di)); \
2653 _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */ \
2654 umul_ppmm (_xh, _xl, _q, (d)); \
2655 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
2656 if (_xh != 0) \
2658 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
2659 _q += 1; \
2660 if (_xh != 0) \
2662 _r -= (d); \
2663 _q += 1; \
2666 if (_r >= (d)) \
2668 _r -= (d); \
2669 _q += 1; \
2671 (r) = _r; \
2672 (q) = _q; \
2673 } while (0)
2675 /* Like udiv_qrnnd_preinv, but branch-free. */
2676 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di) \
2677 do { \
2678 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2679 mp_limb_t _xh, _xl; \
2680 _n2 = (nh); \
2681 _n10 = (nl); \
2682 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2683 _nadj = _n10 + (_nmask & (d)); \
2684 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2685 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2686 _q1 = ~_xh; \
2687 umul_ppmm (_xh, _xl, _q1, d); \
2688 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2689 _xh -= (d); /* xh = 0 or -1 */ \
2690 (r) = _xl + ((d) & _xh); \
2691 (q) = _xh - _q1; \
2692 } while (0)
2694 /* Like udiv_qrnnd_preinv2, but for for any value D. DNORM is D shifted left
2695 so that its most significant bit is set. LGUP is ceil(log2(D)). */
2696 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2697 do { \
2698 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2699 mp_limb_t _xh, _xl; \
2700 _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1)); \
2701 _n10 = (nl) << (GMP_LIMB_BITS - (lgup)); \
2702 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2703 _nadj = _n10 + (_nmask & (dnorm)); \
2704 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2705 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2706 _q1 = ~_xh; \
2707 umul_ppmm (_xh, _xl, _q1, d); \
2708 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2709 _xh -= (d); \
2710 (r) = _xl + ((d) & _xh); \
2711 (q) = _xh - _q1; \
2712 } while (0)
2714 /* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
2716 We write things strangely below, to help gcc. A more straightforward
2717 version:
2719 _r = (nl) - _qh * (d);
2720 _t = _r + (d);
2721 if (_r >= _ql)
2723 _qh--;
2724 _r = _t;
2727 For one operation shorter critical path, one may want to use this form:
2729 _p = _qh * (d)
2730 _s = (nl) + (d);
2731 _r = (nl) - _p;
2732 _t = _s - _p;
2733 if (_r >= _ql)
2735 _qh--;
2736 _r = _t;
2739 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \
2740 do { \
2741 mp_limb_t _qh, _ql, _r; \
2742 umul_ppmm (_qh, _ql, (nh), (di)); \
2743 if (__builtin_constant_p (nl) && (nl) == 0) \
2744 _qh += (nh) + 1; \
2745 else \
2746 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
2747 _r = (nl) - _qh * (d); \
2748 if (_r > _ql) /* both > and >= should be OK */ \
2750 _r += (d); \
2751 _qh--; \
2753 if (UNLIKELY (_r >= (d))) \
2755 _r -= (d); \
2756 _qh++; \
2758 (r) = _r; \
2759 (q) = _qh; \
2760 } while (0)
2762 /* Compute r = nh*B mod d, where di is the inverse of d. */
2763 #define udiv_rnd_preinv(r, nh, d, di) \
2764 do { \
2765 mp_limb_t _qh, _ql, _r; \
2766 umul_ppmm (_qh, _ql, (nh), (di)); \
2767 _qh += (nh) + 1; \
2768 _r = - _qh * (d); \
2769 if (_r > _ql) \
2770 _r += (d); \
2771 (r) = _r; \
2772 } while (0)
2774 /* Compute quotient the quotient and remainder for n / d. Requires d
2775 >= B^2 / 2 and n < d B. di is the inverse
2777 floor ((B^3 - 1) / (d0 + d1 B)) - B.
2779 NOTE: Output variables are updated multiple times. Only some inputs
2780 and outputs may overlap.
2782 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
2783 do { \
2784 mp_limb_t _q0, _t1, _t0, _mask; \
2785 umul_ppmm ((q), _q0, (n2), (dinv)); \
2786 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
2788 /* Compute the two most significant limbs of n - q'd */ \
2789 (r1) = (n1) - (d1) * (q); \
2790 (r0) = (n0); \
2791 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
2792 umul_ppmm (_t1, _t0, (d0), (q)); \
2793 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
2794 (q)++; \
2796 /* Conditionally adjust q and the remainders */ \
2797 _mask = - (mp_limb_t) ((r1) >= _q0); \
2798 (q) += _mask; \
2799 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
2800 if (UNLIKELY ((r1) >= (d1))) \
2802 if ((r1) > (d1) || (r0) >= (d0)) \
2804 (q)++; \
2805 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
2808 } while (0)
2810 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
2811 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2812 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2813 #endif
2816 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
2817 plain mpn_divrem_1. The default is yes, since the few CISC chips where
2818 preinv is not good have defines saying so. */
2819 #ifndef USE_PREINV_DIVREM_1
2820 #define USE_PREINV_DIVREM_1 1
2821 #endif
2823 #if USE_PREINV_DIVREM_1
2824 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2825 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2826 #else
2827 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2828 mpn_divrem_1 (qp, xsize, ap, size, d)
2829 #endif
2831 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
2832 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
2833 #endif
2835 /* This selection may seem backwards. The reason mpn_mod_1 typically takes
2836 over for larger sizes is that it uses the mod_1_1 function. */
2837 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
2838 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
2839 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \
2840 : mpn_mod_1 (src, size, divisor))
2843 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
2844 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
2845 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2846 #endif
2849 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2850 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for
2851 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
2852 modexact are faster at all sizes, so the defaults are 0. Those CPUs
2853 where this is not right have a tuned threshold. */
2854 #ifndef DIVEXACT_1_THRESHOLD
2855 #define DIVEXACT_1_THRESHOLD 0
2856 #endif
2857 #ifndef BMOD_1_TO_MOD_1_THRESHOLD
2858 #define BMOD_1_TO_MOD_1_THRESHOLD 10
2859 #endif
2861 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
2862 #define mpn_divexact_1 __MPN(divexact_1)
2863 __GMP_DECLSPEC void mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2864 #endif
2866 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \
2867 do { \
2868 if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \
2869 ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2870 else \
2872 ASSERT (mpn_mod_1 (src, size, divisor) == 0); \
2873 mpn_divexact_1 (dst, src, size, divisor); \
2875 } while (0)
2877 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
2878 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2879 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2880 #endif
2882 #if HAVE_NATIVE_mpn_modexact_1_odd
2883 #define mpn_modexact_1_odd __MPN(modexact_1_odd)
2884 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2885 #else
2886 #define mpn_modexact_1_odd(src,size,divisor) \
2887 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2888 #endif
2890 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
2891 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \
2892 ? mpn_modexact_1_odd (src, size, divisor) \
2893 : mpn_mod_1 (src, size, divisor))
2895 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
2896 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2897 n must be odd (otherwise such an inverse doesn't exist).
2899 This is not to be confused with invert_limb(), which is completely
2900 different.
2902 The table lookup gives an inverse with the low 8 bits valid, and each
2903 multiply step doubles the number of bits. See Jebelean "An algorithm for
2904 exact division" end of section 4 (reference in gmp.texi).
2906 Possible enhancement: Could use UHWtype until the last step, if half-size
2907 multiplies are faster (might help under _LONG_LONG_LIMB).
2909 Alternative: As noted in Granlund and Montgomery "Division by Invariant
2910 Integers using Multiplication" (reference in gmp.texi), n itself gives a
2911 3-bit inverse immediately, and could be used instead of a table lookup.
2912 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2913 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
2915 #define binvert_limb_table __gmp_binvert_limb_table
2916 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128];
2918 #define binvert_limb(inv,n) \
2919 do { \
2920 mp_limb_t __n = (n); \
2921 mp_limb_t __inv; \
2922 ASSERT ((__n & 1) == 1); \
2924 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
2925 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
2926 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
2927 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
2929 if (GMP_NUMB_BITS > 64) \
2931 int __invbits = 64; \
2932 do { \
2933 __inv = 2 * __inv - __inv * __inv * __n; \
2934 __invbits *= 2; \
2935 } while (__invbits < GMP_NUMB_BITS); \
2938 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
2939 (inv) = __inv & GMP_NUMB_MASK; \
2940 } while (0)
2941 #define modlimb_invert binvert_limb /* backward compatibility */
2943 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2944 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2945 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2946 we need to start from GMP_NUMB_MAX>>1. */
2947 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2949 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2950 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2951 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
2952 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2955 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
2957 It's not clear whether this is the best way to do this calculation.
2958 Anything congruent to -a would be fine for the one limb congruence
2959 tests. */
2961 #define NEG_MOD(r, a, d) \
2962 do { \
2963 ASSERT ((d) != 0); \
2964 ASSERT_LIMB (a); \
2965 ASSERT_LIMB (d); \
2967 if ((a) <= (d)) \
2969 /* small a is reasonably likely */ \
2970 (r) = (d) - (a); \
2972 else \
2974 unsigned __twos; \
2975 mp_limb_t __dnorm; \
2976 count_leading_zeros (__twos, d); \
2977 __twos -= GMP_NAIL_BITS; \
2978 __dnorm = (d) << __twos; \
2979 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
2982 ASSERT_LIMB (r); \
2983 } while (0)
2985 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2986 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
2989 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2990 to 0 if there's an even number. "n" should be an unsigned long and "p"
2991 an int. */
2993 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2994 #define ULONG_PARITY(p, n) \
2995 do { \
2996 int __p; \
2997 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
2998 (p) = __p & 1; \
2999 } while (0)
3000 #endif
3002 /* Cray intrinsic _popcnt. */
3003 #ifdef _CRAY
3004 #define ULONG_PARITY(p, n) \
3005 do { \
3006 (p) = _popcnt (n) & 1; \
3007 } while (0)
3008 #endif
3010 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3011 && ! defined (NO_ASM) && defined (__ia64)
3012 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3013 to a 64 bit unsigned long long for popcnt */
3014 #define ULONG_PARITY(p, n) \
3015 do { \
3016 unsigned long long __n = (unsigned long) (n); \
3017 int __p; \
3018 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
3019 (p) = __p & 1; \
3020 } while (0)
3021 #endif
3023 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3024 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3025 #if __GMP_GNUC_PREREQ (3,1)
3026 #define __GMP_qm "=Qm"
3027 #define __GMP_q "=Q"
3028 #else
3029 #define __GMP_qm "=qm"
3030 #define __GMP_q "=q"
3031 #endif
3032 #define ULONG_PARITY(p, n) \
3033 do { \
3034 char __p; \
3035 unsigned long __n = (n); \
3036 __n ^= (__n >> 16); \
3037 __asm__ ("xorb %h1, %b1\n\t" \
3038 "setpo %0" \
3039 : __GMP_qm (__p), __GMP_q (__n) \
3040 : "1" (__n)); \
3041 (p) = __p; \
3042 } while (0)
3043 #endif
3045 #if ! defined (ULONG_PARITY)
3046 #define ULONG_PARITY(p, n) \
3047 do { \
3048 unsigned long __n = (n); \
3049 int __p = 0; \
3050 do \
3052 __p ^= 0x96696996L >> (__n & 0x1F); \
3053 __n >>= 5; \
3055 while (__n != 0); \
3057 (p) = __p & 1; \
3058 } while (0)
3059 #endif
3062 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
3063 version 3.1 at least) doesn't seem to know how to generate rlwimi for
3064 anything other than bit-fields, so use "asm". */
3065 #if defined (__GNUC__) && ! defined (NO_ASM) \
3066 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3067 #define BSWAP_LIMB(dst, src) \
3068 do { \
3069 mp_limb_t __bswapl_src = (src); \
3070 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
3071 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
3072 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
3073 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
3074 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
3075 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
3076 (dst) = __tmp1 | __tmp2; /* whole */ \
3077 } while (0)
3078 #endif
3080 /* bswap is available on i486 and up and is fast. A combination rorw $8 /
3081 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3082 kernel with xchgb instead of rorw), but this is not done here, because
3083 i386 means generic x86 and mixing word and dword operations will cause
3084 partial register stalls on P6 chips. */
3085 #if defined (__GNUC__) && ! defined (NO_ASM) \
3086 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
3087 && GMP_LIMB_BITS == 32
3088 #define BSWAP_LIMB(dst, src) \
3089 do { \
3090 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
3091 } while (0)
3092 #endif
3094 #if defined (__GNUC__) && ! defined (NO_ASM) \
3095 && defined (__amd64__) && GMP_LIMB_BITS == 64
3096 #define BSWAP_LIMB(dst, src) \
3097 do { \
3098 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
3099 } while (0)
3100 #endif
3102 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3103 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3104 #define BSWAP_LIMB(dst, src) \
3105 do { \
3106 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
3107 } while (0)
3108 #endif
3110 /* As per glibc. */
3111 #if defined (__GNUC__) && ! defined (NO_ASM) \
3112 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3113 #define BSWAP_LIMB(dst, src) \
3114 do { \
3115 mp_limb_t __bswapl_src = (src); \
3116 __asm__ ("ror%.w %#8, %0\n\t" \
3117 "swap %0\n\t" \
3118 "ror%.w %#8, %0" \
3119 : "=d" (dst) \
3120 : "0" (__bswapl_src)); \
3121 } while (0)
3122 #endif
3124 #if ! defined (BSWAP_LIMB)
3125 #if GMP_LIMB_BITS == 8
3126 #define BSWAP_LIMB(dst, src) \
3127 do { (dst) = (src); } while (0)
3128 #endif
3129 #if GMP_LIMB_BITS == 16
3130 #define BSWAP_LIMB(dst, src) \
3131 do { \
3132 (dst) = ((src) << 8) + ((src) >> 8); \
3133 } while (0)
3134 #endif
3135 #if GMP_LIMB_BITS == 32
3136 #define BSWAP_LIMB(dst, src) \
3137 do { \
3138 (dst) = \
3139 ((src) << 24) \
3140 + (((src) & 0xFF00) << 8) \
3141 + (((src) >> 8) & 0xFF00) \
3142 + ((src) >> 24); \
3143 } while (0)
3144 #endif
3145 #if GMP_LIMB_BITS == 64
3146 #define BSWAP_LIMB(dst, src) \
3147 do { \
3148 (dst) = \
3149 ((src) << 56) \
3150 + (((src) & 0xFF00) << 40) \
3151 + (((src) & 0xFF0000) << 24) \
3152 + (((src) & 0xFF000000) << 8) \
3153 + (((src) >> 8) & 0xFF000000) \
3154 + (((src) >> 24) & 0xFF0000) \
3155 + (((src) >> 40) & 0xFF00) \
3156 + ((src) >> 56); \
3157 } while (0)
3158 #endif
3159 #endif
3161 #if ! defined (BSWAP_LIMB)
3162 #define BSWAP_LIMB(dst, src) \
3163 do { \
3164 mp_limb_t __bswapl_src = (src); \
3165 mp_limb_t __dst = 0; \
3166 int __i; \
3167 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
3169 __dst = (__dst << 8) | (__bswapl_src & 0xFF); \
3170 __bswapl_src >>= 8; \
3172 (dst) = __dst; \
3173 } while (0)
3174 #endif
3177 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3178 those we know are fast. */
3179 #if defined (__GNUC__) && ! defined (NO_ASM) \
3180 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3181 && (HAVE_HOST_CPU_powerpc604 \
3182 || HAVE_HOST_CPU_powerpc604e \
3183 || HAVE_HOST_CPU_powerpc750 \
3184 || HAVE_HOST_CPU_powerpc7400)
3185 #define BSWAP_LIMB_FETCH(limb, src) \
3186 do { \
3187 mp_srcptr __blf_src = (src); \
3188 mp_limb_t __limb; \
3189 __asm__ ("lwbrx %0, 0, %1" \
3190 : "=r" (__limb) \
3191 : "r" (__blf_src), \
3192 "m" (*__blf_src)); \
3193 (limb) = __limb; \
3194 } while (0)
3195 #endif
3197 #if ! defined (BSWAP_LIMB_FETCH)
3198 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
3199 #endif
3202 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3203 know are fast. FIXME: Is this necessary? */
3204 #if defined (__GNUC__) && ! defined (NO_ASM) \
3205 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3206 && (HAVE_HOST_CPU_powerpc604 \
3207 || HAVE_HOST_CPU_powerpc604e \
3208 || HAVE_HOST_CPU_powerpc750 \
3209 || HAVE_HOST_CPU_powerpc7400)
3210 #define BSWAP_LIMB_STORE(dst, limb) \
3211 do { \
3212 mp_ptr __dst = (dst); \
3213 mp_limb_t __limb = (limb); \
3214 __asm__ ("stwbrx %1, 0, %2" \
3215 : "=m" (*__dst) \
3216 : "r" (__limb), \
3217 "r" (__dst)); \
3218 } while (0)
3219 #endif
3221 #if ! defined (BSWAP_LIMB_STORE)
3222 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
3223 #endif
3226 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3227 #define MPN_BSWAP(dst, src, size) \
3228 do { \
3229 mp_ptr __dst = (dst); \
3230 mp_srcptr __src = (src); \
3231 mp_size_t __size = (size); \
3232 mp_size_t __i; \
3233 ASSERT ((size) >= 0); \
3234 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
3235 CRAY_Pragma ("_CRI ivdep"); \
3236 for (__i = 0; __i < __size; __i++) \
3238 BSWAP_LIMB_FETCH (*__dst, __src); \
3239 __dst++; \
3240 __src++; \
3242 } while (0)
3244 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3245 #define MPN_BSWAP_REVERSE(dst, src, size) \
3246 do { \
3247 mp_ptr __dst = (dst); \
3248 mp_size_t __size = (size); \
3249 mp_srcptr __src = (src) + __size - 1; \
3250 mp_size_t __i; \
3251 ASSERT ((size) >= 0); \
3252 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
3253 CRAY_Pragma ("_CRI ivdep"); \
3254 for (__i = 0; __i < __size; __i++) \
3256 BSWAP_LIMB_FETCH (*__dst, __src); \
3257 __dst++; \
3258 __src--; \
3260 } while (0)
3263 /* No processor claiming to be SPARC v9 compliant seems to
3264 implement the POPC instruction. Disable pattern for now. */
3265 #if 0
3266 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3267 #define popc_limb(result, input) \
3268 do { \
3269 DItype __res; \
3270 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
3271 } while (0)
3272 #endif
3273 #endif
3275 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3276 #define popc_limb(result, input) \
3277 do { \
3278 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
3279 } while (0)
3280 #endif
3282 /* Cray intrinsic. */
3283 #ifdef _CRAY
3284 #define popc_limb(result, input) \
3285 do { \
3286 (result) = _popcnt (input); \
3287 } while (0)
3288 #endif
3290 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3291 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3292 #define popc_limb(result, input) \
3293 do { \
3294 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
3295 } while (0)
3296 #endif
3298 /* Cool population count of an mp_limb_t.
3299 You have to figure out how this works, We won't tell you!
3301 The constants could also be expressed as:
3302 0x55... = [2^N / 3] = [(2^N-1)/3]
3303 0x33... = [2^N / 5] = [(2^N-1)/5]
3304 0x0f... = [2^N / 17] = [(2^N-1)/17]
3305 (N is GMP_LIMB_BITS, [] denotes truncation.) */
3307 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3308 #define popc_limb(result, input) \
3309 do { \
3310 mp_limb_t __x = (input); \
3311 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3312 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3313 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3314 (result) = __x & 0xff; \
3315 } while (0)
3316 #endif
3318 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3319 #define popc_limb(result, input) \
3320 do { \
3321 mp_limb_t __x = (input); \
3322 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3323 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3324 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3325 __x = ((__x >> 8) + __x); \
3326 (result) = __x & 0xff; \
3327 } while (0)
3328 #endif
3330 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3331 #define popc_limb(result, input) \
3332 do { \
3333 mp_limb_t __x = (input); \
3334 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3335 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3336 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3337 __x = ((__x >> 8) + __x); \
3338 __x = ((__x >> 16) + __x); \
3339 (result) = __x & 0xff; \
3340 } while (0)
3341 #endif
3343 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3344 #define popc_limb(result, input) \
3345 do { \
3346 mp_limb_t __x = (input); \
3347 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3348 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3349 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3350 __x = ((__x >> 8) + __x); \
3351 __x = ((__x >> 16) + __x); \
3352 __x = ((__x >> 32) + __x); \
3353 (result) = __x & 0xff; \
3354 } while (0)
3355 #endif
3358 /* Define stuff for longlong.h. */
3359 #if HAVE_ATTRIBUTE_MODE
3360 typedef unsigned int UQItype __attribute__ ((mode (QI)));
3361 typedef int SItype __attribute__ ((mode (SI)));
3362 typedef unsigned int USItype __attribute__ ((mode (SI)));
3363 typedef int DItype __attribute__ ((mode (DI)));
3364 typedef unsigned int UDItype __attribute__ ((mode (DI)));
3365 #else
3366 typedef unsigned char UQItype;
3367 typedef long SItype;
3368 typedef unsigned long USItype;
3369 #if HAVE_LONG_LONG
3370 typedef long long int DItype;
3371 typedef unsigned long long int UDItype;
3372 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
3373 typedef long int DItype;
3374 typedef unsigned long int UDItype;
3375 #endif
3376 #endif
3378 typedef mp_limb_t UWtype;
3379 typedef unsigned int UHWtype;
3380 #define W_TYPE_SIZE GMP_LIMB_BITS
3382 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3384 Bit field packing is "implementation defined" according to C99, which
3385 leaves us at the compiler's mercy here. For some systems packing is
3386 defined in the ABI (eg. x86). In any case so far it seems universal that
3387 little endian systems pack from low to high, and big endian from high to
3388 low within the given type.
3390 Within the fields we rely on the integer endianness being the same as the
3391 float endianness, this is true everywhere we know of and it'd be a fairly
3392 strange system that did anything else. */
3394 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3395 #define _GMP_IEEE_FLOATS 1
3396 union ieee_double_extract
3398 struct
3400 gmp_uint_least32_t manh:20;
3401 gmp_uint_least32_t exp:11;
3402 gmp_uint_least32_t sig:1;
3403 gmp_uint_least32_t manl:32;
3404 } s;
3405 double d;
3407 #endif
3409 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3410 #define _GMP_IEEE_FLOATS 1
3411 union ieee_double_extract
3413 struct
3415 gmp_uint_least32_t manl:32;
3416 gmp_uint_least32_t manh:20;
3417 gmp_uint_least32_t exp:11;
3418 gmp_uint_least32_t sig:1;
3419 } s;
3420 double d;
3422 #endif
3424 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3425 #define _GMP_IEEE_FLOATS 1
3426 union ieee_double_extract
3428 struct
3430 gmp_uint_least32_t sig:1;
3431 gmp_uint_least32_t exp:11;
3432 gmp_uint_least32_t manh:20;
3433 gmp_uint_least32_t manl:32;
3434 } s;
3435 double d;
3437 #endif
3440 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3441 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
3442 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3443 /* Maximum number of limbs it will take to store any `double'.
3444 We assume doubles have 53 mantissa bits. */
3445 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3447 __GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
3449 #define mpn_get_d __gmpn_get_d
3450 __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
3453 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3454 a_inf if x is an infinity. Both are considered unlikely values, for
3455 branch prediction. */
3457 #if _GMP_IEEE_FLOATS
3458 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3459 do { \
3460 union ieee_double_extract u; \
3461 u.d = (x); \
3462 if (UNLIKELY (u.s.exp == 0x7FF)) \
3464 if (u.s.manl == 0 && u.s.manh == 0) \
3465 { a_inf; } \
3466 else \
3467 { a_nan; } \
3469 } while (0)
3470 #endif
3472 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3473 /* no nans or infs in these formats */
3474 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3475 do { } while (0)
3476 #endif
3478 #ifndef DOUBLE_NAN_INF_ACTION
3479 /* Unknown format, try something generic.
3480 NaN should be "unordered", so x!=x.
3481 Inf should be bigger than DBL_MAX. */
3482 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3483 do { \
3485 if (UNLIKELY ((x) != (x))) \
3486 { a_nan; } \
3487 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
3488 { a_inf; } \
3490 } while (0)
3491 #endif
3493 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3494 in the coprocessor, which means a bigger exponent range than normal, and
3495 depending on the rounding mode, a bigger mantissa than normal. (See
3496 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
3497 "d" through memory to force any rounding and overflows to occur.
3499 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3500 registers, where there's no such extra precision and no need for the
3501 FORCE_DOUBLE. We don't bother to detect this since the present uses for
3502 FORCE_DOUBLE are only in test programs and default generic C code.
3504 Not quite sure that an "automatic volatile" will use memory, but it does
3505 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3506 apparently matching operands like "0" are only allowed on a register
3507 output. gcc 3.4 warns about this, though in fact it and past versions
3508 seem to put the operand through memory as hoped. */
3510 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
3511 || defined (__amd64__))
3512 #define FORCE_DOUBLE(d) \
3513 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3514 #else
3515 #define FORCE_DOUBLE(d) do { } while (0)
3516 #endif
3519 __GMP_DECLSPEC extern int __gmp_junk;
3520 __GMP_DECLSPEC extern const int __gmp_0;
3521 __GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
3522 __GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3523 __GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3524 __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3525 #define GMP_ERROR(code) __gmp_exception (code)
3526 #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
3527 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
3529 #if defined _LONG_LONG_LIMB
3530 #if __GMP_HAVE_TOKEN_PASTE
3531 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3532 #else
3533 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
3534 #endif
3535 #else /* not _LONG_LONG_LIMB */
3536 #if __GMP_HAVE_TOKEN_PASTE
3537 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3538 #else
3539 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
3540 #endif
3541 #endif /* _LONG_LONG_LIMB */
3543 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3544 #if GMP_NUMB_BITS == 2
3545 #define PP 0x3 /* 3 */
3546 #define PP_FIRST_OMITTED 5
3547 #endif
3548 #if GMP_NUMB_BITS == 4
3549 #define PP 0xF /* 3 x 5 */
3550 #define PP_FIRST_OMITTED 7
3551 #endif
3552 #if GMP_NUMB_BITS == 8
3553 #define PP 0x69 /* 3 x 5 x 7 */
3554 #define PP_FIRST_OMITTED 11
3555 #endif
3556 #if GMP_NUMB_BITS == 16
3557 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3558 #define PP_FIRST_OMITTED 17
3559 #endif
3560 #if GMP_NUMB_BITS == 32
3561 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3562 #define PP_INVERTED 0x53E5645CL
3563 #define PP_FIRST_OMITTED 31
3564 #endif
3565 #if GMP_NUMB_BITS == 64
3566 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3567 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3568 #define PP_FIRST_OMITTED 59
3569 #endif
3570 #ifndef PP_FIRST_OMITTED
3571 #define PP_FIRST_OMITTED 3
3572 #endif
3576 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3577 zero bit representing +1 and a one bit representing -1. Bits other than
3578 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3579 used to ensure the expressions are "int"s even if a and/or b might be
3580 other types.
3582 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3583 and their speed is important. Expressions are used rather than
3584 conditionals to accumulate sign changes, which effectively means XORs
3585 instead of conditional JUMPs. */
3587 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3588 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3590 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3591 #define JACOBI_U0(a) ((a) == 1)
3593 /* (a/0), with a given by low and size;
3594 is 1 if a=+/-1, 0 otherwise */
3595 #define JACOBI_LS0(alow,asize) \
3596 (((asize) == 1 || (asize) == -1) && (alow) == 1)
3598 /* (a/0), with a an mpz_t;
3599 fetch of low limb always valid, even if size is zero */
3600 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
3602 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3603 #define JACOBI_0U(b) ((b) == 1)
3605 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3606 #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
3608 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3609 #define JACOBI_0LS(blow,bsize) \
3610 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3612 /* Convert a bit1 to +1 or -1. */
3613 #define JACOBI_BIT1_TO_PN(result_bit1) \
3614 (1 - ((int) (result_bit1) & 2))
3616 /* (2/b), with b unsigned and odd;
3617 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3618 hence obtained from (b>>1)^b */
3619 #define JACOBI_TWO_U_BIT1(b) \
3620 ((int) (((b) >> 1) ^ (b)))
3622 /* (2/b)^twos, with b unsigned and odd */
3623 #define JACOBI_TWOS_U_BIT1(twos, b) \
3624 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3626 /* (2/b)^twos, with b unsigned and odd */
3627 #define JACOBI_TWOS_U(twos, b) \
3628 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3630 /* (-1/b), with b odd (signed or unsigned);
3631 is (-1)^((b-1)/2) */
3632 #define JACOBI_N1B_BIT1(b) \
3633 ((int) (b))
3635 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3636 is (-1/b) if a<0, or +1 if a>=0 */
3637 #define JACOBI_ASGN_SU_BIT1(a, b) \
3638 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3640 /* (a/b) effect due to sign of b: signed/signed;
3641 is -1 if a and b both negative, +1 otherwise */
3642 #define JACOBI_BSGN_SS_BIT1(a, b) \
3643 ((((a)<0) & ((b)<0)) << 1)
3645 /* (a/b) effect due to sign of b: signed/mpz;
3646 is -1 if a and b both negative, +1 otherwise */
3647 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3648 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3650 /* (a/b) effect due to sign of b: mpz/signed;
3651 is -1 if a and b both negative, +1 otherwise */
3652 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3653 JACOBI_BSGN_SZ_BIT1 (b, a)
3655 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3656 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3657 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
3658 because this is used in a couple of places with only bit 1 of a or b
3659 valid. */
3660 #define JACOBI_RECIP_UU_BIT1(a, b) \
3661 ((int) ((a) & (b)))
3663 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3664 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
3665 updated for the new b_ptr. result_bit1 is updated according to the
3666 factors of 2 stripped, as per (a/2). */
3667 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
3668 do { \
3669 ASSERT ((b_size) >= 1); \
3670 ASSERT ((b_low) == (b_ptr)[0]); \
3672 while (UNLIKELY ((b_low) == 0)) \
3674 (b_size)--; \
3675 ASSERT ((b_size) >= 1); \
3676 (b_ptr)++; \
3677 (b_low) = *(b_ptr); \
3679 ASSERT (((a) & 1) != 0); \
3680 if ((GMP_NUMB_BITS % 2) == 1) \
3681 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
3683 } while (0)
3685 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3686 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
3687 unsigned. modexact_1_odd effectively calculates -a mod b, and
3688 result_bit1 is adjusted for the factor of -1.
3690 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3691 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3692 factor to introduce into result_bit1, so for that case use mpn_mod_1
3693 unconditionally.
3695 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3696 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
3697 or not skip a divide step, or something. */
3699 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3700 do { \
3701 mp_srcptr __a_ptr = (a_ptr); \
3702 mp_size_t __a_size = (a_size); \
3703 mp_limb_t __b = (b); \
3705 ASSERT (__a_size >= 1); \
3706 ASSERT (__b & 1); \
3708 if ((GMP_NUMB_BITS % 2) != 0 \
3709 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
3711 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
3713 else \
3715 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
3716 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
3718 } while (0)
3720 /* Matrix multiplication */
3721 #define mpn_matrix22_mul __MPN(matrix22_mul)
3722 __GMP_DECLSPEC void mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3723 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
3724 __GMP_DECLSPEC void mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3725 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
3726 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
3728 #ifndef MATRIX22_STRASSEN_THRESHOLD
3729 #define MATRIX22_STRASSEN_THRESHOLD 30
3730 #endif
3732 /* HGCD definitions */
3734 /* Extract one numb, shifting count bits left
3735 ________ ________
3736 |___xh___||___xl___|
3737 |____r____|
3738 >count <
3740 The count includes any nail bits, so it should work fine if count
3741 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
3742 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
3744 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
3745 those calls where the count high bits of xh may be non-zero.
3748 #define MPN_EXTRACT_NUMB(count, xh, xl) \
3749 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \
3750 ((xl) >> (GMP_LIMB_BITS - (count))))
3753 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
3754 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
3755 than a, b. The determinant must always be one, so that M has an
3756 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
3757 bits. */
3758 struct hgcd_matrix1
3760 mp_limb_t u[2][2];
3763 #define mpn_hgcd2 __MPN (hgcd2)
3764 __GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
3766 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
3767 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3769 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
3770 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3772 struct hgcd_matrix
3774 mp_size_t alloc; /* for sanity checking only */
3775 mp_size_t n;
3776 mp_ptr p[2][2];
3779 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
3781 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
3782 __GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
3784 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
3785 __GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
3787 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
3788 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3790 #define mpn_hgcd_itch __MPN (hgcd_itch)
3791 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
3793 #define mpn_hgcd __MPN (hgcd)
3794 __GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3796 #define MPN_HGCD_LEHMER_ITCH(n) (n)
3798 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
3799 __GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3801 /* Needs storage for the quotient */
3802 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
3804 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
3805 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3807 #define MPN_GCD_LEHMER_N_ITCH(n) (n)
3809 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
3810 __GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3812 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
3813 __GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
3815 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
3817 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
3818 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3820 /* 4*(an + 1) + 4*(bn + 1) + an */
3821 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
3823 #ifndef HGCD_THRESHOLD
3824 #define HGCD_THRESHOLD 400
3825 #endif
3827 #ifndef GCD_DC_THRESHOLD
3828 #define GCD_DC_THRESHOLD 1000
3829 #endif
3831 #ifndef GCDEXT_DC_THRESHOLD
3832 #define GCDEXT_DC_THRESHOLD 600
3833 #endif
3835 /* Definitions for mpn_set_str and mpn_get_str */
3836 struct powers
3838 mp_ptr p; /* actual power value */
3839 mp_size_t n; /* # of limbs at p */
3840 mp_size_t shift; /* weight of lowest limb, in limb base B */
3841 size_t digits_in_base; /* number of corresponding digits */
3842 int base;
3844 typedef struct powers powers_t;
3845 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
3846 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
3847 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
3848 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
3850 #define mpn_dc_set_str __MPN(dc_set_str)
3851 __GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
3852 #define mpn_bc_set_str __MPN(bc_set_str)
3853 __GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
3854 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
3855 __GMP_DECLSPEC void mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
3858 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3859 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
3860 hence giving back the user's size in bits rounded up. Notice that
3861 converting prec->bits->prec gives an unchanged value. */
3862 #define __GMPF_BITS_TO_PREC(n) \
3863 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3864 #define __GMPF_PREC_TO_BITS(n) \
3865 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3867 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
3870 /* Set n to the number of significant digits an mpf of the given _mp_prec
3871 field, in the given base. This is a rounded up value, designed to ensure
3872 there's enough digits to reproduce all the guaranteed part of the value.
3874 There are prec many limbs, but the high might be only "1" so forget it
3875 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
3876 further +1 is because the limbs usually won't fall on digit boundaries.
3878 FIXME: If base is a power of 2 and the bits per digit divides
3879 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for
3880 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3882 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
3883 do { \
3884 ASSERT (base >= 2 && base < numberof (mp_bases)); \
3885 (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS) \
3886 * mp_bases[(base)].chars_per_bit_exactly); \
3887 } while (0)
3890 /* Decimal point string, from the current C locale. Needs <langinfo.h> for
3891 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
3892 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3893 their respective #if HAVE_FOO_H.
3895 GLIBC recommends nl_langinfo because getting only one facet can be
3896 faster, apparently. */
3898 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3899 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3900 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
3901 #endif
3902 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3903 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3904 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
3905 #endif
3906 /* localeconv is slower since it returns all locale stuff */
3907 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3908 #define GMP_DECIMAL_POINT (localeconv()->decimal_point)
3909 #endif
3910 #if ! defined (GMP_DECIMAL_POINT)
3911 #define GMP_DECIMAL_POINT (".")
3912 #endif
3915 #define DOPRNT_CONV_FIXED 1
3916 #define DOPRNT_CONV_SCIENTIFIC 2
3917 #define DOPRNT_CONV_GENERAL 3
3919 #define DOPRNT_JUSTIFY_NONE 0
3920 #define DOPRNT_JUSTIFY_LEFT 1
3921 #define DOPRNT_JUSTIFY_RIGHT 2
3922 #define DOPRNT_JUSTIFY_INTERNAL 3
3924 #define DOPRNT_SHOWBASE_YES 1
3925 #define DOPRNT_SHOWBASE_NO 2
3926 #define DOPRNT_SHOWBASE_NONZERO 3
3928 struct doprnt_params_t {
3929 int base; /* negative for upper case */
3930 int conv; /* choices above */
3931 const char *expfmt; /* exponent format */
3932 int exptimes4; /* exponent multiply by 4 */
3933 char fill; /* character */
3934 int justify; /* choices above */
3935 int prec; /* prec field, or -1 for all digits */
3936 int showbase; /* choices above */
3937 int showpoint; /* if radix point always shown */
3938 int showtrailing; /* if trailing zeros wanted */
3939 char sign; /* '+', ' ', or '\0' */
3940 int width; /* width field */
3943 #if _GMP_H_HAVE_VA_LIST
3945 __GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
3946 __GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
3947 __GMP_DECLSPEC typedef int (*doprnt_reps_t) __GMP_PROTO ((void *, int, int));
3948 __GMP_DECLSPEC typedef int (*doprnt_final_t) __GMP_PROTO ((void *));
3950 struct doprnt_funs_t {
3951 doprnt_format_t format;
3952 doprnt_memory_t memory;
3953 doprnt_reps_t reps;
3954 doprnt_final_t final; /* NULL if not required */
3957 extern const struct doprnt_funs_t __gmp_fprintf_funs;
3958 extern const struct doprnt_funs_t __gmp_sprintf_funs;
3959 extern const struct doprnt_funs_t __gmp_snprintf_funs;
3960 extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
3961 extern const struct doprnt_funs_t __gmp_ostream_funs;
3963 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
3964 "size" of these have been written. "alloc > size" is maintained, so
3965 there's room to store a '\0' at the end. "result" is where the
3966 application wants the final block pointer. */
3967 struct gmp_asprintf_t {
3968 char **result;
3969 char *buf;
3970 size_t size;
3971 size_t alloc;
3974 #define GMP_ASPRINTF_T_INIT(d, output) \
3975 do { \
3976 (d).result = (output); \
3977 (d).alloc = 256; \
3978 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
3979 (d).size = 0; \
3980 } while (0)
3982 /* If a realloc is necessary, use twice the size actually required, so as to
3983 avoid repeated small reallocs. */
3984 #define GMP_ASPRINTF_T_NEED(d, n) \
3985 do { \
3986 size_t alloc, newsize, newalloc; \
3987 ASSERT ((d)->alloc >= (d)->size + 1); \
3989 alloc = (d)->alloc; \
3990 newsize = (d)->size + (n); \
3991 if (alloc <= newsize) \
3993 newalloc = 2*newsize; \
3994 (d)->alloc = newalloc; \
3995 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
3996 alloc, newalloc, char); \
3998 } while (0)
4000 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
4001 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
4002 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
4004 /* buf is where to write the next output, and size is how much space is left
4005 there. If the application passed size==0 then that's what we'll have
4006 here, and nothing at all should be written. */
4007 struct gmp_snprintf_t {
4008 char *buf;
4009 size_t size;
4012 /* Add the bytes printed by the call to the total retval, or bail out on an
4013 error. */
4014 #define DOPRNT_ACCUMULATE(call) \
4015 do { \
4016 int __ret; \
4017 __ret = call; \
4018 if (__ret == -1) \
4019 goto error; \
4020 retval += __ret; \
4021 } while (0)
4022 #define DOPRNT_ACCUMULATE_FUN(fun, params) \
4023 do { \
4024 ASSERT ((fun) != NULL); \
4025 DOPRNT_ACCUMULATE ((*(fun)) params); \
4026 } while (0)
4028 #define DOPRNT_FORMAT(fmt, ap) \
4029 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4030 #define DOPRNT_MEMORY(ptr, len) \
4031 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4032 #define DOPRNT_REPS(c, n) \
4033 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4035 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
4037 #define DOPRNT_REPS_MAYBE(c, n) \
4038 do { \
4039 if ((n) != 0) \
4040 DOPRNT_REPS (c, n); \
4041 } while (0)
4042 #define DOPRNT_MEMORY_MAYBE(ptr, len) \
4043 do { \
4044 if ((len) != 0) \
4045 DOPRNT_MEMORY (ptr, len); \
4046 } while (0)
4048 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
4049 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
4051 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4052 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
4054 __GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
4055 #endif /* _GMP_H_HAVE_VA_LIST */
4058 typedef int (*gmp_doscan_scan_t) __GMP_PROTO ((void *, const char *, ...));
4059 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
4060 typedef int (*gmp_doscan_get_t) __GMP_PROTO ((void *));
4061 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
4063 struct gmp_doscan_funs_t {
4064 gmp_doscan_scan_t scan;
4065 gmp_doscan_step_t step;
4066 gmp_doscan_get_t get;
4067 gmp_doscan_unget_t unget;
4069 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
4070 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
4072 #if _GMP_H_HAVE_VA_LIST
4073 __GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list));
4074 #endif
4077 /* For testing and debugging. */
4078 #define MPZ_CHECK_FORMAT(z) \
4079 do { \
4080 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
4081 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
4082 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
4083 } while (0)
4085 #define MPQ_CHECK_FORMAT(q) \
4086 do { \
4087 MPZ_CHECK_FORMAT (mpq_numref (q)); \
4088 MPZ_CHECK_FORMAT (mpq_denref (q)); \
4089 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
4091 if (SIZ(mpq_numref(q)) == 0) \
4093 /* should have zero as 0/1 */ \
4094 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
4095 && PTR(mpq_denref(q))[0] == 1); \
4097 else \
4099 /* should have no common factors */ \
4100 mpz_t g; \
4101 mpz_init (g); \
4102 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
4103 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
4104 mpz_clear (g); \
4106 } while (0)
4108 #define MPF_CHECK_FORMAT(f) \
4109 do { \
4110 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
4111 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
4112 if (SIZ(f) == 0) \
4113 ASSERT_ALWAYS (EXP(f) == 0); \
4114 if (SIZ(f) != 0) \
4115 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
4116 } while (0)
4119 #define MPZ_PROVOKE_REALLOC(z) \
4120 do { ALLOC(z) = ABSIZ(z); } while (0)
4123 /* Enhancement: The "mod" and "gcd_1" functions below could have
4124 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4125 function pointers, only actual functions. It probably doesn't make much
4126 difference to the gmp code, since hopefully we arrange calls so there's
4127 no great need for the compiler to move things around. */
4129 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4130 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4131 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */
4132 struct cpuvec_t {
4133 DECL_add_n ((*add_n));
4134 DECL_addmul_1 ((*addmul_1));
4135 DECL_copyd ((*copyd));
4136 DECL_copyi ((*copyi));
4137 DECL_divexact_1 ((*divexact_1));
4138 DECL_divexact_by3c ((*divexact_by3c));
4139 DECL_divrem_1 ((*divrem_1));
4140 DECL_gcd_1 ((*gcd_1));
4141 DECL_lshift ((*lshift));
4142 DECL_mod_1 ((*mod_1));
4143 DECL_mod_34lsub1 ((*mod_34lsub1));
4144 DECL_modexact_1c_odd ((*modexact_1c_odd));
4145 DECL_mul_1 ((*mul_1));
4146 DECL_mul_basecase ((*mul_basecase));
4147 DECL_preinv_divrem_1 ((*preinv_divrem_1));
4148 DECL_preinv_mod_1 ((*preinv_mod_1));
4149 DECL_rshift ((*rshift));
4150 DECL_sqr_basecase ((*sqr_basecase));
4151 DECL_sub_n ((*sub_n));
4152 DECL_submul_1 ((*submul_1));
4153 int initialized;
4154 mp_size_t mul_toom22_threshold;
4155 mp_size_t mul_toom33_threshold;
4156 mp_size_t sqr_toom2_threshold;
4157 mp_size_t sqr_toom3_threshold;
4159 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4160 #endif /* x86 fat binary */
4162 __GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void));
4164 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4165 if that hasn't yet been done (to establish the right values). */
4166 #define CPUVEC_THRESHOLD(field) \
4167 ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
4168 __gmpn_cpuvec.field)
4171 #if HAVE_NATIVE_mpn_add_nc
4172 #define mpn_add_nc __MPN(add_nc)
4173 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4174 #else
4175 static inline
4176 mp_limb_t
4177 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4179 mp_limb_t co;
4180 co = mpn_add_n (rp, up, vp, n);
4181 co += mpn_add_1 (rp, rp, n, ci);
4182 return co;
4184 #endif
4186 #if HAVE_NATIVE_mpn_sub_nc
4187 #define mpn_sub_nc __MPN(sub_nc)
4188 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4189 #else
4190 static inline mp_limb_t
4191 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4193 mp_limb_t co;
4194 co = mpn_sub_n (rp, up, vp, n);
4195 co += mpn_sub_1 (rp, rp, n, ci);
4196 return co;
4198 #endif
4200 static inline int
4201 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4203 mp_size_t i;
4204 for (i = n - 1; i >= 0; i--)
4206 if (ap[i] != 0)
4207 return 0;
4209 return 1;
4212 #if TUNE_PROGRAM_BUILD
4213 /* Some extras wanted when recompiling some .c files for use by the tune
4214 program. Not part of a normal build.
4216 It's necessary to keep these thresholds as #defines (just to an
4217 identically named variable), since various defaults are established based
4218 on #ifdef in the .c files. For some this is not so (the defaults are
4219 instead established above), but all are done this way for consistency. */
4221 #undef MUL_TOOM22_THRESHOLD
4222 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold
4223 extern mp_size_t mul_toom22_threshold;
4225 #undef MUL_TOOM33_THRESHOLD
4226 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold
4227 extern mp_size_t mul_toom33_threshold;
4229 #undef MUL_TOOM44_THRESHOLD
4230 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold
4231 extern mp_size_t mul_toom44_threshold;
4233 #undef MUL_TOOM6H_THRESHOLD
4234 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
4235 extern mp_size_t mul_toom6h_threshold;
4237 #undef MUL_TOOM8H_THRESHOLD
4238 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
4239 extern mp_size_t mul_toom8h_threshold;
4241 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD
4242 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
4243 extern mp_size_t mul_toom32_to_toom43_threshold;
4245 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD
4246 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
4247 extern mp_size_t mul_toom32_to_toom53_threshold;
4249 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD
4250 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
4251 extern mp_size_t mul_toom42_to_toom53_threshold;
4253 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD
4254 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
4255 extern mp_size_t mul_toom42_to_toom63_threshold;
4257 #undef MUL_FFT_THRESHOLD
4258 #define MUL_FFT_THRESHOLD mul_fft_threshold
4259 extern mp_size_t mul_fft_threshold;
4261 #undef MUL_FFT_MODF_THRESHOLD
4262 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
4263 extern mp_size_t mul_fft_modf_threshold;
4265 #undef MUL_FFT_TABLE
4266 #define MUL_FFT_TABLE { 0 }
4268 #undef MUL_FFT_TABLE3
4269 #define MUL_FFT_TABLE3 { {0,0} }
4271 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4272 remain as zero (always use it). */
4273 #if ! HAVE_NATIVE_mpn_sqr_basecase
4274 #undef SQR_BASECASE_THRESHOLD
4275 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
4276 extern mp_size_t sqr_basecase_threshold;
4277 #endif
4279 #if TUNE_PROGRAM_BUILD_SQR
4280 #undef SQR_TOOM2_THRESHOLD
4281 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC
4282 #else
4283 #undef SQR_TOOM2_THRESHOLD
4284 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold
4285 extern mp_size_t sqr_toom2_threshold;
4286 #endif
4288 #undef SQR_TOOM3_THRESHOLD
4289 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
4290 extern mp_size_t sqr_toom3_threshold;
4292 #undef SQR_TOOM4_THRESHOLD
4293 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
4294 extern mp_size_t sqr_toom4_threshold;
4296 #undef SQR_TOOM6_THRESHOLD
4297 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
4298 extern mp_size_t sqr_toom6_threshold;
4300 #undef SQR_TOOM8_THRESHOLD
4301 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
4302 extern mp_size_t sqr_toom8_threshold;
4304 #undef SQR_FFT_THRESHOLD
4305 #define SQR_FFT_THRESHOLD sqr_fft_threshold
4306 extern mp_size_t sqr_fft_threshold;
4308 #undef SQR_FFT_MODF_THRESHOLD
4309 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
4310 extern mp_size_t sqr_fft_modf_threshold;
4312 #undef SQR_FFT_TABLE
4313 #define SQR_FFT_TABLE { 0 }
4315 #undef SQR_FFT_TABLE3
4316 #define SQR_FFT_TABLE3 { {0,0} }
4318 #undef MULLO_BASECASE_THRESHOLD
4319 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold
4320 extern mp_size_t mullo_basecase_threshold;
4322 #undef MULLO_DC_THRESHOLD
4323 #define MULLO_DC_THRESHOLD mullo_dc_threshold
4324 extern mp_size_t mullo_dc_threshold;
4326 #undef MULLO_MUL_N_THRESHOLD
4327 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
4328 extern mp_size_t mullo_mul_n_threshold;
4330 #undef DC_DIV_QR_THRESHOLD
4331 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
4332 extern mp_size_t dc_div_qr_threshold;
4334 #undef DC_DIVAPPR_Q_THRESHOLD
4335 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
4336 extern mp_size_t dc_divappr_q_threshold;
4338 #undef DC_BDIV_Q_THRESHOLD
4339 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
4340 extern mp_size_t dc_bdiv_q_threshold;
4342 #undef DC_BDIV_QR_THRESHOLD
4343 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold
4344 extern mp_size_t dc_bdiv_qr_threshold;
4346 #undef MU_DIV_QR_THRESHOLD
4347 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold
4348 extern mp_size_t mu_div_qr_threshold;
4350 #undef MU_DIVAPPR_Q_THRESHOLD
4351 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold
4352 extern mp_size_t mu_divappr_q_threshold;
4354 #undef MUPI_DIV_QR_THRESHOLD
4355 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold
4356 extern mp_size_t mupi_div_qr_threshold;
4358 #undef MU_BDIV_QR_THRESHOLD
4359 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold
4360 extern mp_size_t mu_bdiv_qr_threshold;
4362 #undef MU_BDIV_Q_THRESHOLD
4363 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold
4364 extern mp_size_t mu_bdiv_q_threshold;
4366 #undef INV_MULMOD_BNM1_THRESHOLD
4367 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold
4368 extern mp_size_t inv_mulmod_bnm1_threshold;
4370 #undef INV_NEWTON_THRESHOLD
4371 #define INV_NEWTON_THRESHOLD inv_newton_threshold
4372 extern mp_size_t inv_newton_threshold;
4374 #undef INV_APPR_THRESHOLD
4375 #define INV_APPR_THRESHOLD inv_appr_threshold
4376 extern mp_size_t inv_appr_threshold;
4378 #undef BINV_NEWTON_THRESHOLD
4379 #define BINV_NEWTON_THRESHOLD binv_newton_threshold
4380 extern mp_size_t binv_newton_threshold;
4382 #undef REDC_1_TO_REDC_2_THRESHOLD
4383 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
4384 extern mp_size_t redc_1_to_redc_2_threshold;
4386 #undef REDC_2_TO_REDC_N_THRESHOLD
4387 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold
4388 extern mp_size_t redc_2_to_redc_n_threshold;
4390 #undef REDC_1_TO_REDC_N_THRESHOLD
4391 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold
4392 extern mp_size_t redc_1_to_redc_n_threshold;
4394 #undef MATRIX22_STRASSEN_THRESHOLD
4395 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
4396 extern mp_size_t matrix22_strassen_threshold;
4398 #undef HGCD_THRESHOLD
4399 #define HGCD_THRESHOLD hgcd_threshold
4400 extern mp_size_t hgcd_threshold;
4402 #undef GCD_DC_THRESHOLD
4403 #define GCD_DC_THRESHOLD gcd_dc_threshold
4404 extern mp_size_t gcd_dc_threshold;
4406 #undef GCDEXT_DC_THRESHOLD
4407 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
4408 extern mp_size_t gcdext_dc_threshold;
4410 #undef DIVREM_1_NORM_THRESHOLD
4411 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
4412 extern mp_size_t divrem_1_norm_threshold;
4414 #undef DIVREM_1_UNNORM_THRESHOLD
4415 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
4416 extern mp_size_t divrem_1_unnorm_threshold;
4418 #undef MOD_1_NORM_THRESHOLD
4419 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
4420 extern mp_size_t mod_1_norm_threshold;
4422 #undef MOD_1_UNNORM_THRESHOLD
4423 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
4424 extern mp_size_t mod_1_unnorm_threshold;
4426 #undef MOD_1N_TO_MOD_1_1_THRESHOLD
4427 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
4428 extern mp_size_t mod_1n_to_mod_1_1_threshold;
4430 #undef MOD_1U_TO_MOD_1_1_THRESHOLD
4431 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold
4432 extern mp_size_t mod_1u_to_mod_1_1_threshold;
4434 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD
4435 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
4436 extern mp_size_t mod_1_1_to_mod_1_2_threshold;
4438 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD
4439 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
4440 extern mp_size_t mod_1_2_to_mod_1_4_threshold;
4442 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD
4443 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold
4444 extern mp_size_t preinv_mod_1_to_mod_1_threshold;
4446 #if ! UDIV_PREINV_ALWAYS
4447 #undef DIVREM_2_THRESHOLD
4448 #define DIVREM_2_THRESHOLD divrem_2_threshold
4449 extern mp_size_t divrem_2_threshold;
4450 #endif
4452 #undef MULMOD_BNM1_THRESHOLD
4453 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold
4454 extern mp_size_t mulmod_bnm1_threshold;
4456 #undef SQRMOD_BNM1_THRESHOLD
4457 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold
4458 extern mp_size_t sqrmod_bnm1_threshold;
4460 #undef GET_STR_DC_THRESHOLD
4461 #define GET_STR_DC_THRESHOLD get_str_dc_threshold
4462 extern mp_size_t get_str_dc_threshold;
4464 #undef GET_STR_PRECOMPUTE_THRESHOLD
4465 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
4466 extern mp_size_t get_str_precompute_threshold;
4468 #undef SET_STR_DC_THRESHOLD
4469 #define SET_STR_DC_THRESHOLD set_str_dc_threshold
4470 extern mp_size_t set_str_dc_threshold;
4472 #undef SET_STR_PRECOMPUTE_THRESHOLD
4473 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
4474 extern mp_size_t set_str_precompute_threshold;
4476 #undef FFT_TABLE_ATTRS
4477 #define FFT_TABLE_ATTRS
4478 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4479 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */
4480 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
4482 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4483 #undef MUL_TOOM22_THRESHOLD_LIMIT
4484 #undef MUL_TOOM33_THRESHOLD_LIMIT
4485 #undef MULLO_BASECASE_THRESHOLD_LIMIT
4486 #undef SQR_TOOM3_THRESHOLD_LIMIT
4487 #define SQR_TOOM2_MAX_GENERIC 200
4488 #define MUL_TOOM22_THRESHOLD_LIMIT 700
4489 #define MUL_TOOM33_THRESHOLD_LIMIT 700
4490 #define SQR_TOOM3_THRESHOLD_LIMIT 400
4491 #define MUL_TOOM44_THRESHOLD_LIMIT 1000
4492 #define SQR_TOOM4_THRESHOLD_LIMIT 1000
4493 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100
4494 #define SQR_TOOM6_THRESHOLD_LIMIT 1100
4495 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200
4496 #define SQR_TOOM8_THRESHOLD_LIMIT 1200
4497 #define MULLO_BASECASE_THRESHOLD_LIMIT 200
4498 #define GET_STR_THRESHOLD_LIMIT 150
4500 #endif /* TUNE_PROGRAM_BUILD */
4502 #if defined (__cplusplus)
4504 #endif
4506 /* FIXME: Make these itch functions less conservative. Also consider making
4507 them dependent on just 'an', and compute the allocation directly from 'an'
4508 instead of via n. */
4510 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
4511 k is ths smallest k such that
4512 ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
4513 which implies that
4514 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
4515 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
4517 #define mpn_toom22_mul_itch(an, bn) \
4518 (2 * ((an) + GMP_NUMB_BITS))
4519 #define mpn_toom2_sqr_itch(an) \
4520 (2 * ((an) + GMP_NUMB_BITS))
4522 /* Can probably be trimmed to 2 an + O(log an). */
4523 #define mpn_toom33_mul_itch(an, bn) \
4524 ((5 * (an) >> 1) + GMP_NUMB_BITS)
4525 #define mpn_toom3_sqr_itch(an) \
4526 ((5 * (an) >> 1) + GMP_NUMB_BITS)
4528 #define mpn_toom44_mul_itch(an, bn) \
4529 (3 * (an) + GMP_NUMB_BITS)
4530 #define mpn_toom4_sqr_itch(an) \
4531 (3 * (an) + GMP_NUMB_BITS)
4533 #define mpn_toom6_sqr_itch(n) \
4534 ( ((n) - SQR_TOOM6_THRESHOLD)*2 + \
4535 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
4536 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) )
4538 #define mpn_toom6_mul_n_itch(n) \
4539 ( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
4540 MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
4541 mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
4543 static inline mp_size_t
4544 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
4545 mp_size_t estimatedN;
4546 estimatedN = (an + bn) / (size_t) 10 + 1;
4547 return mpn_toom6_mul_n_itch (estimatedN * 6);
4550 #define mpn_toom8_sqr_itch(n) \
4551 ( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
4552 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
4553 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) )
4555 #define mpn_toom8_mul_n_itch(n) \
4556 ( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) + \
4557 MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
4558 mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) )
4560 static inline mp_size_t
4561 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
4562 mp_size_t estimatedN;
4563 estimatedN = (an + bn) / (size_t) 14 + 1;
4564 return mpn_toom8_mul_n_itch (estimatedN * 8);
4567 static inline mp_size_t
4568 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
4570 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
4571 mp_size_t itch = 2 * n + 1;
4573 return itch;
4576 static inline mp_size_t
4577 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
4579 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
4580 return 6 * n + 3;
4583 static inline mp_size_t
4584 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
4586 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
4588 return 6*n + 4;
4591 static inline mp_size_t
4592 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
4594 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
4595 return 6*n + 4;
4598 static inline mp_size_t
4599 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
4601 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
4602 return 10 * n + 10;
4605 static inline mp_size_t
4606 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
4608 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
4609 return 10 * n + 10;
4612 static inline mp_size_t
4613 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
4615 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
4616 return 9 * n + 3;
4619 #if 0
4620 #define mpn_fft_mul mpn_mul_fft_full
4621 #else
4622 #define mpn_fft_mul mpn_nussbaumer_mul
4623 #endif
4625 #ifdef __cplusplus
4627 /* A little helper for a null-terminated __gmp_allocate_func string.
4628 The destructor ensures it's freed even if an exception is thrown.
4629 The len field is needed by the destructor, and can be used by anyone else
4630 to avoid a second strlen pass over the data.
4632 Since our input is a C string, using strlen is correct. Perhaps it'd be
4633 more C++-ish style to use std::char_traits<char>::length, but char_traits
4634 isn't available in gcc 2.95.4. */
4636 class gmp_allocated_string {
4637 public:
4638 char *str;
4639 size_t len;
4640 gmp_allocated_string(char *arg)
4642 str = arg;
4643 len = std::strlen (str);
4645 ~gmp_allocated_string()
4647 (*__gmp_free_func) (str, len+1);
4651 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
4652 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
4653 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
4654 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
4655 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
4656 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
4658 #endif /* __cplusplus */
4660 #endif /* __GMP_IMPL_H__ */