df: add --output=file to directly output specified arguments
[coreutils.git] / src / factor.c
blob257a7ed90e1147a580d4ea8ca13da0b98be3562a
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synched 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or the SQUFOF algorithm, checking status of found factors
54 again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 #include <config.h>
88 #include <getopt.h>
89 #include <stdio.h>
90 #if HAVE_GMP
91 # include <gmp.h>
92 # if !HAVE_DECL_MPZ_INITS
93 # include <stdarg.h>
94 # endif
95 #endif
97 #include <assert.h>
99 #include "system.h"
100 #include "error.h"
101 #include "quote.h"
102 #include "readtokens.h"
103 #include "xstrtol.h"
105 /* The official name of this program (e.g., no 'g' prefix). */
106 #define PROGRAM_NAME "factor"
108 #define AUTHORS \
109 proper_name ("Paul Rubin"), \
110 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
111 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
113 /* Token delimiters when reading from a file. */
114 #define DELIM "\n\t "
116 #ifndef STAT_SQUFOF
117 # define STAT_SQUFOF 0
118 #endif
120 #ifndef USE_LONGLONG_H
121 /* With the way we use longlong.h, it's only safe to use
122 when UWtype = UHWtype, as there were various cases
123 (as can be seen in the history for longlong.h) where
124 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
125 to avoid compile time or run time issues. */
126 # if LONG_MAX == INTMAX_MAX
127 # define USE_LONGLONG_H 1
128 # endif
129 #endif
131 #if USE_LONGLONG_H
133 /* Make definitions for longlong.h to make it do what it can do for us */
135 /* bitcount for uintmax_t */
136 # if UINTMAX_MAX == UINT32_MAX
137 # define W_TYPE_SIZE 32
138 # elif UINTMAX_MAX == UINT64_MAX
139 # define W_TYPE_SIZE 64
140 # elif UINTMAX_MAX == UINT128_MAX
141 # define W_TYPE_SIZE 128
142 # endif
144 # define UWtype uintmax_t
145 # define UHWtype unsigned long int
146 # undef UDWtype
147 # if HAVE_ATTRIBUTE_MODE
148 typedef unsigned int UQItype __attribute__ ((mode (QI)));
149 typedef int SItype __attribute__ ((mode (SI)));
150 typedef unsigned int USItype __attribute__ ((mode (SI)));
151 typedef int DItype __attribute__ ((mode (DI)));
152 typedef unsigned int UDItype __attribute__ ((mode (DI)));
153 # else
154 typedef unsigned char UQItype;
155 typedef long SItype;
156 typedef unsigned long int USItype;
157 # if HAVE_LONG_LONG_INT
158 typedef long long int DItype;
159 typedef unsigned long long int UDItype;
160 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
161 typedef long int DItype;
162 typedef unsigned long int UDItype;
163 # endif
164 # endif
165 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
166 # define ASSERT(x) /* FIXME make longlong.h really standalone */
167 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
168 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
169 # ifndef __GMP_GNUC_PREREQ
170 # define __GMP_GNUC_PREREQ(a,b) 1
171 # endif
173 /* These stub macros are only used in longlong.h in certain system compiler
174 combinations, so ensure usage to avoid -Wunused-macros warnings. */
175 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
176 ASSERT (1)
177 __GMP_DECLSPEC
178 # endif
180 # if _ARCH_PPC
181 # define HAVE_HOST_CPU_FAMILY_powerpc 1
182 # endif
183 # include "longlong.h"
184 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
185 const unsigned char factor_clz_tab[129] =
187 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
188 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
189 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
190 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
193 # endif
195 #else /* not USE_LONGLONG_H */
197 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
198 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
199 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
200 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
202 #endif
204 #if !defined __clz_tab && !defined UHWtype
205 /* Without this seemingly useless conditional, gcc -Wunused-macros
206 warns that each of the two tested macros is unused on Fedora 18.
207 FIXME: this is just an ugly band-aid. Fix it properly. */
208 #endif
210 enum alg_type { ALG_POLLARD_RHO = 1, ALG_SQUFOF = 2 };
212 static enum alg_type alg;
214 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
215 #define MAX_NFACTS 26
217 enum
219 DEV_DEBUG_OPTION = CHAR_MAX + 1
222 static struct option const long_options[] =
224 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
225 {GETOPT_HELP_OPTION_DECL},
226 {GETOPT_VERSION_OPTION_DECL},
227 {NULL, 0, NULL, 0}
230 struct factors
232 uintmax_t plarge[2]; /* Can have a single large factor */
233 uintmax_t p[MAX_NFACTS];
234 unsigned char e[MAX_NFACTS];
235 unsigned char nfactors;
238 #if HAVE_GMP
239 struct mp_factors
241 mpz_t *p;
242 unsigned long int *e;
243 unsigned long int nfactors;
245 #endif
247 static void factor (uintmax_t, uintmax_t, struct factors *);
249 #ifndef umul_ppmm
250 # define umul_ppmm(w1, w0, u, v) \
251 do { \
252 uintmax_t __x0, __x1, __x2, __x3; \
253 unsigned long int __ul, __vl, __uh, __vh; \
254 uintmax_t __u = (u), __v = (v); \
256 __ul = __ll_lowpart (__u); \
257 __uh = __ll_highpart (__u); \
258 __vl = __ll_lowpart (__v); \
259 __vh = __ll_highpart (__v); \
261 __x0 = (uintmax_t) __ul * __vl; \
262 __x1 = (uintmax_t) __ul * __vh; \
263 __x2 = (uintmax_t) __uh * __vl; \
264 __x3 = (uintmax_t) __uh * __vh; \
266 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
267 __x1 += __x2; /* but this indeed can */ \
268 if (__x1 < __x2) /* did we get it? */ \
269 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
271 (w1) = __x3 + __ll_highpart (__x1); \
272 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
273 } while (0)
274 #endif
276 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
277 /* Define our own, not needing normalization. This function is
278 currently not performance critical, so keep it simple. Similar to
279 the mod macro below. */
280 # undef udiv_qrnnd
281 # define udiv_qrnnd(q, r, n1, n0, d) \
282 do { \
283 uintmax_t __d1, __d0, __q, __r1, __r0; \
285 assert ((n1) < (d)); \
286 __d1 = (d); __d0 = 0; \
287 __r1 = (n1); __r0 = (n0); \
288 __q = 0; \
289 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
291 rsh2 (__d1, __d0, __d1, __d0, 1); \
292 __q <<= 1; \
293 if (ge2 (__r1, __r0, __d1, __d0)) \
295 __q++; \
296 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
299 (r) = __r0; \
300 (q) = __q; \
301 } while (0)
302 #endif
304 #if !defined add_ssaaaa
305 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
306 do { \
307 uintmax_t _add_x; \
308 _add_x = (al) + (bl); \
309 (sh) = (ah) + (bh) + (_add_x < (al)); \
310 (sl) = _add_x; \
311 } while (0)
312 #endif
314 #define rsh2(rh, rl, ah, al, cnt) \
315 do { \
316 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
317 (rh) = (ah) >> (cnt); \
318 } while (0)
320 #define lsh2(rh, rl, ah, al, cnt) \
321 do { \
322 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
323 (rl) = (al) << (cnt); \
324 } while (0)
326 #define ge2(ah, al, bh, bl) \
327 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
329 #define gt2(ah, al, bh, bl) \
330 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
332 #ifndef sub_ddmmss
333 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
334 do { \
335 uintmax_t _cy; \
336 _cy = (al) < (bl); \
337 (rl) = (al) - (bl); \
338 (rh) = (ah) - (bh) - _cy; \
339 } while (0)
340 #endif
342 #ifndef count_leading_zeros
343 # define count_leading_zeros(count, x) do { \
344 uintmax_t __clz_x = (x); \
345 unsigned int __clz_c; \
346 for (__clz_c = 0; \
347 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
348 __clz_c += 8) \
349 __clz_x <<= 8; \
350 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
351 __clz_x <<= 1; \
352 (count) = __clz_c; \
353 } while (0)
354 #endif
356 #ifndef count_trailing_zeros
357 # define count_trailing_zeros(count, x) do { \
358 uintmax_t __ctz_x = (x); \
359 unsigned int __ctz_c = 0; \
360 while ((__ctz_x & 1) == 0) \
362 __ctz_x >>= 1; \
363 __ctz_c++; \
365 (count) = __ctz_c; \
366 } while (0)
367 #endif
369 /* Requires that a < n and b <= n */
370 #define submod(r,a,b,n) \
371 do { \
372 uintmax_t _t = - (uintmax_t) (a < b); \
373 (r) = ((n) & _t) + (a) - (b); \
374 } while (0)
376 #define addmod(r,a,b,n) \
377 submod ((r), (a), ((n) - (b)), (n))
379 /* Modular two-word addition and subtraction. For performance reasons, the
380 most significant bit of n1 must be clear. The destination variables must be
381 distinct from the mod operand. */
382 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
383 do { \
384 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
385 if (ge2 ((r1), (r0), (n1), (n0))) \
386 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
387 } while (0)
388 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
389 do { \
390 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
391 if ((intmax_t) (r1) < 0) \
392 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
393 } while (0)
395 #define HIGHBIT_TO_MASK(x) \
396 (((intmax_t)-1 >> 1) < 0 \
397 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
398 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
399 ? UINTMAX_MAX : (uintmax_t) 0))
401 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
402 Requires that d1 != 0. */
403 static uintmax_t
404 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
406 int cntd, cnta;
408 assert (d1 != 0);
410 if (a1 == 0)
412 *r1 = 0;
413 return a0;
416 count_leading_zeros (cntd, d1);
417 count_leading_zeros (cnta, a1);
418 int cnt = cntd - cnta;
419 lsh2 (d1, d0, d1, d0, cnt);
420 for (int i = 0; i < cnt; i++)
422 if (ge2 (a1, a0, d1, d0))
423 sub_ddmmss (a1, a0, a1, a0, d1, d0);
424 rsh2 (d1, d0, d1, d0, 1);
427 *r1 = a1;
428 return a0;
431 static uintmax_t _GL_ATTRIBUTE_CONST
432 gcd_odd (uintmax_t a, uintmax_t b)
434 if ( (b & 1) == 0)
436 uintmax_t t = b;
437 b = a;
438 a = t;
440 if (a == 0)
441 return b;
443 /* Take out least significant one bit, to make room for sign */
444 b >>= 1;
446 for (;;)
448 uintmax_t t;
449 uintmax_t bgta;
451 while ((a & 1) == 0)
452 a >>= 1;
453 a >>= 1;
455 t = a - b;
456 if (t == 0)
457 return (a << 1) + 1;
459 bgta = HIGHBIT_TO_MASK (t);
461 /* b <-- min (a, b) */
462 b += (bgta & t);
464 /* a <-- |a - b| */
465 a = (t ^ bgta) - bgta;
469 static uintmax_t
470 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
472 while ((a0 & 1) == 0)
473 rsh2 (a1, a0, a1, a0, 1);
474 while ((b0 & 1) == 0)
475 rsh2 (b1, b0, b1, b0, 1);
477 for (;;)
479 if ((b1 | a1) == 0)
481 *r1 = 0;
482 return gcd_odd (b0, a0);
485 if (gt2 (a1, a0, b1, b0))
487 sub_ddmmss (a1, a0, a1, a0, b1, b0);
489 rsh2 (a1, a0, a1, a0, 1);
490 while ((a0 & 1) == 0);
492 else if (gt2 (b1, b0, a1, a0))
494 sub_ddmmss (b1, b0, b1, b0, a1, a0);
496 rsh2 (b1, b0, b1, b0, 1);
497 while ((b0 & 1) == 0);
499 else
500 break;
503 *r1 = a1;
504 return a0;
507 static void
508 factor_insert_multiplicity (struct factors *factors,
509 uintmax_t prime, unsigned int m)
511 unsigned int nfactors = factors->nfactors;
512 uintmax_t *p = factors->p;
513 unsigned char *e = factors->e;
515 /* Locate position for insert new or increment e. */
516 int i;
517 for (i = nfactors - 1; i >= 0; i--)
519 if (p[i] <= prime)
520 break;
523 if (i < 0 || p[i] != prime)
525 for (int j = nfactors - 1; j > i; j--)
527 p[j + 1] = p[j];
528 e[j + 1] = e[j];
530 p[i + 1] = prime;
531 e[i + 1] = m;
532 factors->nfactors = nfactors + 1;
534 else
536 e[i] += m;
540 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
542 static void
543 factor_insert_large (struct factors *factors,
544 uintmax_t p1, uintmax_t p0)
546 if (p1 > 0)
548 assert (factors->plarge[1] == 0);
549 factors->plarge[0] = p0;
550 factors->plarge[1] = p1;
552 else
553 factor_insert (factors, p0);
556 #if HAVE_GMP
558 # if !HAVE_DECL_MPZ_INITS
560 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
561 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
563 static void
564 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
566 va_list ap;
568 va_start (ap, mpz_single_init);
570 mpz_t *mpz;
571 while ((mpz = va_arg (ap, mpz_t *)))
572 mpz_single_init (*mpz);
574 va_end (ap);
576 # endif
578 static void mp_factor (mpz_t, struct mp_factors *);
580 static void
581 mp_factor_init (struct mp_factors *factors)
583 factors->p = NULL;
584 factors->e = NULL;
585 factors->nfactors = 0;
588 static void
589 mp_factor_clear (struct mp_factors *factors)
591 for (unsigned int i = 0; i < factors->nfactors; i++)
592 mpz_clear (factors->p[i]);
594 free (factors->p);
595 free (factors->e);
598 static void
599 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
601 unsigned long int nfactors = factors->nfactors;
602 mpz_t *p = factors->p;
603 unsigned long int *e = factors->e;
604 long i;
606 /* Locate position for insert new or increment e. */
607 for (i = nfactors - 1; i >= 0; i--)
609 if (mpz_cmp (p[i], prime) <= 0)
610 break;
613 if (i < 0 || mpz_cmp (p[i], prime) != 0)
615 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
616 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
618 mpz_init (p[nfactors]);
619 for (long j = nfactors - 1; j > i; j--)
621 mpz_set (p[j + 1], p[j]);
622 e[j + 1] = e[j];
624 mpz_set (p[i + 1], prime);
625 e[i + 1] = 1;
627 factors->p = p;
628 factors->e = e;
629 factors->nfactors = nfactors + 1;
631 else
633 e[i] += 1;
637 static void
638 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
640 mpz_t pz;
642 mpz_init_set_ui (pz, prime);
643 mp_factor_insert (factors, pz);
644 mpz_clear (pz);
646 #endif /* HAVE_GMP */
649 /* Number of bits in an uintmax_t. */
650 enum { W = sizeof (uintmax_t) * CHAR_BIT };
652 /* Verify that uintmax_t does not have holes in its representation. */
653 verify (UINTMAX_MAX >> (W - 1) == 1);
655 #define P(a,b,c,d) a,
656 static const unsigned char primes_diff[] = {
657 #include "primes.h"
658 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
660 #undef P
662 #define PRIMES_PTAB_ENTRIES \
663 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
665 #define P(a,b,c,d) b,
666 static const unsigned char primes_diff8[] = {
667 #include "primes.h"
668 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
670 #undef P
672 struct primes_dtab
674 uintmax_t binv, lim;
677 #define P(a,b,c,d) {c,d},
678 static const struct primes_dtab primes_dtab[] = {
679 #include "primes.h"
680 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
682 #undef P
684 /* Verify that uintmax_t is not wider than
685 the integers used to generate primes.h. */
686 verify (W <= WIDE_UINT_BITS);
688 /* debugging for developers. Enables devmsg().
689 This flag is used only in the GMP code. */
690 static bool dev_debug = false;
692 /* Like error(0, 0, ...), but without an implicit newline.
693 Also a noop unless the global DEV_DEBUG is set.
694 TODO: Replace with variadic macro in system.h or
695 move to a separate module. */
696 static inline void
697 devmsg (char const *fmt, ...)
699 if (dev_debug)
701 va_list ap;
702 va_start (ap, fmt);
703 vfprintf (stderr, fmt, ap);
704 va_end (ap);
708 /* Prove primality or run probabilistic tests. */
709 static bool flag_prove_primality = true;
711 /* Number of Miller-Rabin tests to run when not proving primality. */
712 #define MR_REPS 25
714 #ifdef __GNUC__
715 # define LIKELY(cond) __builtin_expect ((cond), 1)
716 # define UNLIKELY(cond) __builtin_expect ((cond), 0)
717 #else
718 # define LIKELY(cond) (cond)
719 # define UNLIKELY(cond) (cond)
720 #endif
722 static void
723 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
724 unsigned int off)
726 for (unsigned int j = 0; j < off; j++)
727 p += primes_diff[i + j];
728 factor_insert (factors, p);
731 /* Trial division with odd primes uses the following trick.
733 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
734 consider the case t < B (this is the second loop below).
736 From our tables we get
738 binv = p^{-1} (mod B)
739 lim = floor ( (B-1) / p ).
741 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
742 (and all quotients in this range occur for some t).
744 Then t = q * p is true also (mod B), and p is invertible we get
746 q = t * binv (mod B).
748 Next, assume that t is *not* divisible by p. Since multiplication
749 by binv (mod B) is a one-to-one mapping,
751 t * binv (mod B) > lim,
753 because all the smaller values are already taken.
755 This can be summed up by saying that the function
757 q(t) = binv * t (mod B)
759 is a permutation of the range 0 <= t < B, with the curious property
760 that it maps the multiples of p onto the range 0 <= q <= lim, in
761 order, and the non-multiples of p onto the range lim < q < B.
764 static uintmax_t
765 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
766 struct factors *factors)
768 if (t0 % 2 == 0)
770 unsigned int cnt;
772 if (t0 == 0)
774 count_trailing_zeros (cnt, t1);
775 t0 = t1 >> cnt;
776 t1 = 0;
777 cnt += W_TYPE_SIZE;
779 else
781 count_trailing_zeros (cnt, t0);
782 rsh2 (t1, t0, t1, t0, cnt);
785 factor_insert_multiplicity (factors, 2, cnt);
788 uintmax_t p = 3;
789 unsigned int i;
790 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
792 for (;;)
794 uintmax_t q1, q0, hi, lo _GL_UNUSED;
796 q0 = t0 * primes_dtab[i].binv;
797 umul_ppmm (hi, lo, q0, p);
798 if (hi > t1)
799 break;
800 hi = t1 - hi;
801 q1 = hi * primes_dtab[i].binv;
802 if (LIKELY (q1 > primes_dtab[i].lim))
803 break;
804 t1 = q1; t0 = q0;
805 factor_insert (factors, p);
807 p += primes_diff[i + 1];
809 if (t1p)
810 *t1p = t1;
812 #define DIVBLOCK(I) \
813 do { \
814 for (;;) \
816 q = t0 * pd[I].binv; \
817 if (LIKELY (q > pd[I].lim)) \
818 break; \
819 t0 = q; \
820 factor_insert_refind (factors, p, i + 1, I); \
822 } while (0)
824 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
826 uintmax_t q;
827 const struct primes_dtab *pd = &primes_dtab[i];
828 DIVBLOCK (0);
829 DIVBLOCK (1);
830 DIVBLOCK (2);
831 DIVBLOCK (3);
832 DIVBLOCK (4);
833 DIVBLOCK (5);
834 DIVBLOCK (6);
835 DIVBLOCK (7);
837 p += primes_diff8[i];
838 if (p * p > t0)
839 break;
842 return t0;
845 #if HAVE_GMP
846 static void
847 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
849 mpz_t q;
850 unsigned long int p;
852 devmsg ("[trial division] ");
854 mpz_init (q);
856 p = mpz_scan1 (t, 0);
857 mpz_div_2exp (t, t, p);
858 while (p)
860 mp_factor_insert_ui (factors, 2);
861 --p;
864 p = 3;
865 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
867 if (! mpz_divisible_ui_p (t, p))
869 p += primes_diff[i++];
870 if (mpz_cmp_ui (t, p * p) < 0)
871 break;
873 else
875 mpz_tdiv_q_ui (t, t, p);
876 mp_factor_insert_ui (factors, p);
880 mpz_clear (q);
882 #endif
884 /* Entry i contains (2i+1)^(-1) mod 2^8. */
885 static const unsigned char binvert_table[128] =
887 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
888 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
889 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
890 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
891 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
892 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
893 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
894 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
895 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
896 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
897 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
898 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
899 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
900 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
901 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
902 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
905 /* Compute n^(-1) mod B, using a Newton iteration. */
906 #define binv(inv,n) \
907 do { \
908 uintmax_t __n = (n); \
909 uintmax_t __inv; \
911 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
912 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
913 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
914 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
916 if (W_TYPE_SIZE > 64) \
918 int __invbits = 64; \
919 do { \
920 __inv = 2 * __inv - __inv * __inv * __n; \
921 __invbits *= 2; \
922 } while (__invbits < W_TYPE_SIZE); \
925 (inv) = __inv; \
926 } while (0)
928 /* q = u / d, assuming d|u. */
929 #define divexact_21(q1, q0, u1, u0, d) \
930 do { \
931 uintmax_t _di, _q0; \
932 binv (_di, (d)); \
933 _q0 = (u0) * _di; \
934 if ((u1) >= (d)) \
936 uintmax_t _p1, _p0 _GL_UNUSED; \
937 umul_ppmm (_p1, _p0, _q0, d); \
938 (q1) = ((u1) - _p1) * _di; \
939 (q0) = _q0; \
941 else \
943 (q0) = _q0; \
944 (q1) = 0; \
946 } while (0)
948 /* x B (mod n). */
949 #define redcify(r_prim, r, n) \
950 do { \
951 uintmax_t _redcify_q _GL_UNUSED; \
952 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
953 } while (0)
955 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
956 #define redcify2(r1, r0, x, n1, n0) \
957 do { \
958 uintmax_t _r1, _r0, _i; \
959 if ((x) < (n1)) \
961 _r1 = (x); _r0 = 0; \
962 _i = W_TYPE_SIZE; \
964 else \
966 _r1 = 0; _r0 = (x); \
967 _i = 2*W_TYPE_SIZE; \
969 while (_i-- > 0) \
971 lsh2 (_r1, _r0, _r1, _r0, 1); \
972 if (ge2 (_r1, _r0, (n1), (n0))) \
973 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
975 (r1) = _r1; \
976 (r0) = _r0; \
977 } while (0)
979 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
980 Both a and b must be in redc form, the result will be in redc form too. */
981 static inline uintmax_t
982 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
984 uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh;
986 umul_ppmm (rh, rl, a, b);
987 q = rl * mi;
988 umul_ppmm (th, tl, q, m);
989 xh = rh - th;
990 if (rh < th)
991 xh += m;
993 return xh;
996 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
997 Both a and b must be in redc form, the result will be in redc form too.
998 For performance reasons, the most significant bit of m must be clear. */
999 static uintmax_t
1000 mulredc2 (uintmax_t *r1p,
1001 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
1002 uintmax_t m1, uintmax_t m0, uintmax_t mi)
1004 uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0;
1005 mi = -mi;
1006 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
1007 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
1008 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
1010 /* First compute a0 * <b1, b0> B^{-1}
1011 +-----+
1012 |a0 b0|
1013 +--+--+--+
1014 |a0 b1|
1015 +--+--+--+
1016 |q0 m0|
1017 +--+--+--+
1018 |q0 m1|
1019 -+--+--+--+
1020 |r1|r0| 0|
1021 +--+--+--+
1023 umul_ppmm (t1, t0, a0, b0);
1024 umul_ppmm (r1, r0, a0, b1);
1025 q = mi * t0;
1026 umul_ppmm (p1, p0, q, m0);
1027 umul_ppmm (s1, s0, q, m1);
1028 r0 += (t0 != 0); /* Carry */
1029 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1030 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1031 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1033 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1034 +-----+
1035 |a1 b0|
1036 +--+--+
1037 |r1|r0|
1038 +--+--+--+
1039 |a1 b1|
1040 +--+--+--+
1041 |q1 m0|
1042 +--+--+--+
1043 |q1 m1|
1044 -+--+--+--+
1045 |r1|r0| 0|
1046 +--+--+--+
1048 umul_ppmm (t1, t0, a1, b0);
1049 umul_ppmm (s1, s0, a1, b1);
1050 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1051 q = mi * t0;
1052 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1053 umul_ppmm (p1, p0, q, m0);
1054 umul_ppmm (s1, s0, q, m1);
1055 r0 += (t0 != 0); /* Carry */
1056 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1057 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1058 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1060 if (ge2 (r1, r0, m1, m0))
1061 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1063 *r1p = r1;
1064 return r0;
1067 static uintmax_t _GL_ATTRIBUTE_CONST
1068 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1070 uintmax_t y = one;
1072 if (e & 1)
1073 y = b;
1075 while (e != 0)
1077 b = mulredc (b, b, n, ni);
1078 e >>= 1;
1080 if (e & 1)
1081 y = mulredc (y, b, n, ni);
1084 return y;
1087 static uintmax_t
1088 powm2 (uintmax_t *r1m,
1089 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1090 uintmax_t ni, const uintmax_t *one)
1092 uintmax_t r1, r0, b1, b0, n1, n0;
1093 unsigned int i;
1094 uintmax_t e;
1096 b0 = bp[0];
1097 b1 = bp[1];
1098 n0 = np[0];
1099 n1 = np[1];
1101 r0 = one[0];
1102 r1 = one[1];
1104 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1106 if (e & 1)
1108 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1109 r1 = *r1m;
1111 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1112 b1 = *r1m;
1114 for (e = ep[1]; e > 0; e >>= 1)
1116 if (e & 1)
1118 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1119 r1 = *r1m;
1121 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1122 b1 = *r1m;
1124 *r1m = r1;
1125 return r0;
1128 static bool _GL_ATTRIBUTE_CONST
1129 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1130 unsigned int k, uintmax_t one)
1132 uintmax_t y = powm (b, q, n, ni, one);
1134 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1136 if (y == one || y == nm1)
1137 return true;
1139 for (unsigned int i = 1; i < k; i++)
1141 y = mulredc (y, y, n, ni);
1143 if (y == nm1)
1144 return true;
1145 if (y == one)
1146 return false;
1148 return false;
1151 static bool
1152 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1153 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1155 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1157 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1158 y1 = r1m;
1160 if (y0 == one[0] && y1 == one[1])
1161 return true;
1163 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1165 if (y0 == nm1_0 && y1 == nm1_1)
1166 return true;
1168 for (unsigned int i = 1; i < k; i++)
1170 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1171 y1 = r1m;
1173 if (y0 == nm1_0 && y1 == nm1_1)
1174 return true;
1175 if (y0 == one[0] && y1 == one[1])
1176 return false;
1178 return false;
1181 #if HAVE_GMP
1182 static bool
1183 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1184 mpz_srcptr q, unsigned long int k)
1186 mpz_powm (y, x, q, n);
1188 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1189 return true;
1191 for (unsigned long int i = 1; i < k; i++)
1193 mpz_powm_ui (y, y, 2, n);
1194 if (mpz_cmp (y, nm1) == 0)
1195 return true;
1196 if (mpz_cmp_ui (y, 1) == 0)
1197 return false;
1199 return false;
1201 #endif
1203 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1204 have been observed. The average seem to be about 2. */
1205 static bool
1206 prime_p (uintmax_t n)
1208 int k;
1209 bool is_prime;
1210 uintmax_t a_prim, one, ni;
1211 struct factors factors;
1213 if (n <= 1)
1214 return false;
1216 /* We have already casted out small primes. */
1217 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1218 return true;
1220 /* Precomputation for Miller-Rabin. */
1221 uintmax_t q = n - 1;
1222 for (k = 0; (q & 1) == 0; k++)
1223 q >>= 1;
1225 uintmax_t a = 2;
1226 binv (ni, n); /* ni <- 1/n mod B */
1227 redcify (one, 1, n);
1228 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1230 /* Perform a Miller-Rabin test, finds most composites quickly. */
1231 if (!millerrabin (n, ni, a_prim, q, k, one))
1232 return false;
1234 if (flag_prove_primality)
1236 /* Factor n-1 for Lucas. */
1237 factor (0, n - 1, &factors);
1240 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1241 number composite. */
1242 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1244 if (flag_prove_primality)
1246 is_prime = true;
1247 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1249 is_prime
1250 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1253 else
1255 /* After enough Miller-Rabin runs, be content. */
1256 is_prime = (r == MR_REPS - 1);
1259 if (is_prime)
1260 return true;
1262 a += primes_diff[r]; /* Establish new base. */
1264 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1265 on most processors, since it avoids udiv_qrnnd. If we go down the
1266 udiv_qrnnd_preinv path, this code should be replaced. */
1268 uintmax_t s1, s0;
1269 umul_ppmm (s1, s0, one, a);
1270 if (LIKELY (s1 == 0))
1271 a_prim = s0 % n;
1272 else
1274 uintmax_t dummy _GL_UNUSED;
1275 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1279 if (!millerrabin (n, ni, a_prim, q, k, one))
1280 return false;
1283 error (0, 0, _("Lucas prime test failure. This should not happen"));
1284 abort ();
1287 static bool
1288 prime2_p (uintmax_t n1, uintmax_t n0)
1290 uintmax_t q[2], nm1[2];
1291 uintmax_t a_prim[2];
1292 uintmax_t one[2];
1293 uintmax_t na[2];
1294 uintmax_t ni;
1295 unsigned int k;
1296 struct factors factors;
1298 if (n1 == 0)
1299 return prime_p (n0);
1301 nm1[1] = n1 - (n0 == 0);
1302 nm1[0] = n0 - 1;
1303 if (nm1[0] == 0)
1305 count_trailing_zeros (k, nm1[1]);
1307 q[0] = nm1[1] >> k;
1308 q[1] = 0;
1309 k += W_TYPE_SIZE;
1311 else
1313 count_trailing_zeros (k, nm1[0]);
1314 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1317 uintmax_t a = 2;
1318 binv (ni, n0);
1319 redcify2 (one[1], one[0], 1, n1, n0);
1320 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1322 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1323 na[0] = n0;
1324 na[1] = n1;
1326 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1327 return false;
1329 if (flag_prove_primality)
1331 /* Factor n-1 for Lucas. */
1332 factor (nm1[1], nm1[0], &factors);
1335 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1336 number composite. */
1337 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1339 bool is_prime;
1340 uintmax_t e[2], y[2];
1342 if (flag_prove_primality)
1344 is_prime = true;
1345 if (factors.plarge[1])
1347 uintmax_t pi;
1348 binv (pi, factors.plarge[0]);
1349 e[0] = pi * nm1[0];
1350 e[1] = 0;
1351 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1352 is_prime = (y[0] != one[0] || y[1] != one[1]);
1354 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1356 /* FIXME: We always have the factor 2. Do we really need to
1357 handle it here? We have done the same powering as part
1358 of millerrabin. */
1359 if (factors.p[i] == 2)
1360 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1361 else
1362 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1363 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1364 is_prime = (y[0] != one[0] || y[1] != one[1]);
1367 else
1369 /* After enough Miller-Rabin runs, be content. */
1370 is_prime = (r == MR_REPS - 1);
1373 if (is_prime)
1374 return true;
1376 a += primes_diff[r]; /* Establish new base. */
1377 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1379 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1380 return false;
1383 error (0, 0, _("Lucas prime test failure. This should not happen"));
1384 abort ();
1387 #if HAVE_GMP
1388 static bool
1389 mp_prime_p (mpz_t n)
1391 bool is_prime;
1392 mpz_t q, a, nm1, tmp;
1393 struct mp_factors factors;
1395 if (mpz_cmp_ui (n, 1) <= 0)
1396 return false;
1398 /* We have already casted out small primes. */
1399 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1400 return true;
1402 mpz_inits (q, a, nm1, tmp, NULL);
1404 /* Precomputation for Miller-Rabin. */
1405 mpz_sub_ui (nm1, n, 1);
1407 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1408 unsigned long int k = mpz_scan1 (nm1, 0);
1409 mpz_tdiv_q_2exp (q, nm1, k);
1411 mpz_set_ui (a, 2);
1413 /* Perform a Miller-Rabin test, finds most composites quickly. */
1414 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1416 is_prime = false;
1417 goto ret2;
1420 if (flag_prove_primality)
1422 /* Factor n-1 for Lucas. */
1423 mpz_set (tmp, nm1);
1424 mp_factor (tmp, &factors);
1427 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1428 number composite. */
1429 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1431 if (flag_prove_primality)
1433 is_prime = true;
1434 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1436 mpz_divexact (tmp, nm1, factors.p[i]);
1437 mpz_powm (tmp, a, tmp, n);
1438 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1441 else
1443 /* After enough Miller-Rabin runs, be content. */
1444 is_prime = (r == MR_REPS - 1);
1447 if (is_prime)
1448 goto ret1;
1450 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1452 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1454 is_prime = false;
1455 goto ret1;
1459 error (0, 0, _("Lucas prime test failure. This should not happen"));
1460 abort ();
1462 ret1:
1463 if (flag_prove_primality)
1464 mp_factor_clear (&factors);
1465 ret2:
1466 mpz_clears (q, a, nm1, tmp, NULL);
1468 return is_prime;
1470 #endif
1472 static void
1473 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1474 struct factors *factors)
1476 uintmax_t x, z, y, P, t, ni, g;
1478 unsigned long int k = 1;
1479 unsigned long int l = 1;
1481 redcify (P, 1, n);
1482 addmod (x, P, P, n); /* i.e., redcify(2) */
1483 y = z = x;
1485 while (n != 1)
1487 assert (a < n);
1489 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1491 for (;;)
1495 x = mulredc (x, x, n, ni);
1496 addmod (x, x, a, n);
1498 submod (t, z, x, n);
1499 P = mulredc (P, t, n, ni);
1501 if (k % 32 == 1)
1503 if (gcd_odd (P, n) != 1)
1504 goto factor_found;
1505 y = x;
1508 while (--k != 0);
1510 z = x;
1511 k = l;
1512 l = 2 * l;
1513 for (unsigned long int i = 0; i < k; i++)
1515 x = mulredc (x, x, n, ni);
1516 addmod (x, x, a, n);
1518 y = x;
1521 factor_found:
1524 y = mulredc (y, y, n, ni);
1525 addmod (y, y, a, n);
1527 submod (t, z, y, n);
1528 g = gcd_odd (t, n);
1530 while (g == 1);
1532 n = n / g;
1534 if (!prime_p (g))
1535 factor_using_pollard_rho (g, a + 1, factors);
1536 else
1537 factor_insert (factors, g);
1539 if (prime_p (n))
1541 factor_insert (factors, n);
1542 break;
1545 x = x % n;
1546 z = z % n;
1547 y = y % n;
1551 static void
1552 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1553 struct factors *factors)
1555 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1557 unsigned long int k = 1;
1558 unsigned long int l = 1;
1560 redcify2 (P1, P0, 1, n1, n0);
1561 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1562 y1 = z1 = x1;
1563 y0 = z0 = x0;
1565 while (n1 != 0 || n0 != 1)
1567 binv (ni, n0);
1569 for (;;)
1573 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1574 x1 = r1m;
1575 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1577 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1578 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1579 P1 = r1m;
1581 if (k % 32 == 1)
1583 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1584 if (g1 != 0 || g0 != 1)
1585 goto factor_found;
1586 y1 = x1; y0 = x0;
1589 while (--k != 0);
1591 z1 = x1; z0 = x0;
1592 k = l;
1593 l = 2 * l;
1594 for (unsigned long int i = 0; i < k; i++)
1596 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1597 x1 = r1m;
1598 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1600 y1 = x1; y0 = x0;
1603 factor_found:
1606 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1607 y1 = r1m;
1608 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1610 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1611 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1613 while (g1 == 0 && g0 == 1);
1615 if (g1 == 0)
1617 /* The found factor is one word. */
1618 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1620 if (!prime_p (g0))
1621 factor_using_pollard_rho (g0, a + 1, factors);
1622 else
1623 factor_insert (factors, g0);
1625 else
1627 /* The found factor is two words. This is highly unlikely, thus hard
1628 to trigger. Please be careful before you change this code! */
1629 uintmax_t ginv;
1631 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1632 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1633 n1 = 0; /* modulo B, ignoring the high divisor word. */
1635 if (!prime2_p (g1, g0))
1636 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1637 else
1638 factor_insert_large (factors, g1, g0);
1641 if (n1 == 0)
1643 if (prime_p (n0))
1645 factor_insert (factors, n0);
1646 break;
1649 factor_using_pollard_rho (n0, a, factors);
1650 return;
1653 if (prime2_p (n1, n0))
1655 factor_insert_large (factors, n1, n0);
1656 break;
1659 x0 = mod2 (&x1, x1, x0, n1, n0);
1660 z0 = mod2 (&z1, z1, z0, n1, n0);
1661 y0 = mod2 (&y1, y1, y0, n1, n0);
1665 #if HAVE_GMP
1666 static void
1667 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1668 struct mp_factors *factors)
1670 mpz_t x, z, y, P;
1671 mpz_t t, t2;
1673 devmsg ("[pollard-rho (%lu)] ", a);
1675 mpz_inits (t, t2, NULL);
1676 mpz_init_set_si (y, 2);
1677 mpz_init_set_si (x, 2);
1678 mpz_init_set_si (z, 2);
1679 mpz_init_set_ui (P, 1);
1681 unsigned long long int k = 1;
1682 unsigned long long int l = 1;
1684 while (mpz_cmp_ui (n, 1) != 0)
1686 for (;;)
1690 mpz_mul (t, x, x);
1691 mpz_mod (x, t, n);
1692 mpz_add_ui (x, x, a);
1694 mpz_sub (t, z, x);
1695 mpz_mul (t2, P, t);
1696 mpz_mod (P, t2, n);
1698 if (k % 32 == 1)
1700 mpz_gcd (t, P, n);
1701 if (mpz_cmp_ui (t, 1) != 0)
1702 goto factor_found;
1703 mpz_set (y, x);
1706 while (--k != 0);
1708 mpz_set (z, x);
1709 k = l;
1710 l = 2 * l;
1711 for (unsigned long long int i = 0; i < k; i++)
1713 mpz_mul (t, x, x);
1714 mpz_mod (x, t, n);
1715 mpz_add_ui (x, x, a);
1717 mpz_set (y, x);
1720 factor_found:
1723 mpz_mul (t, y, y);
1724 mpz_mod (y, t, n);
1725 mpz_add_ui (y, y, a);
1727 mpz_sub (t, z, y);
1728 mpz_gcd (t, t, n);
1730 while (mpz_cmp_ui (t, 1) == 0);
1732 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1734 if (!mp_prime_p (t))
1736 devmsg ("[composite factor--restarting pollard-rho] ");
1737 mp_factor_using_pollard_rho (t, a + 1, factors);
1739 else
1741 mp_factor_insert (factors, t);
1744 if (mp_prime_p (n))
1746 mp_factor_insert (factors, n);
1747 break;
1750 mpz_mod (x, x, n);
1751 mpz_mod (z, z, n);
1752 mpz_mod (y, y, n);
1755 mpz_clears (P, t2, t, z, x, y, NULL);
1757 #endif
1759 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1760 algorithm is replaced, consider also returning the remainder. */
1761 static uintmax_t _GL_ATTRIBUTE_CONST
1762 isqrt (uintmax_t n)
1764 uintmax_t x;
1765 unsigned c;
1766 if (n == 0)
1767 return 0;
1769 count_leading_zeros (c, n);
1771 /* Make x > sqrt(n). This will be invariant through the loop. */
1772 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1774 for (;;)
1776 uintmax_t y = (x + n/x) / 2;
1777 if (y >= x)
1778 return x;
1780 x = y;
1784 static uintmax_t _GL_ATTRIBUTE_CONST
1785 isqrt2 (uintmax_t nh, uintmax_t nl)
1787 unsigned int shift;
1788 uintmax_t x;
1790 /* Ensures the remainder fits in an uintmax_t. */
1791 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1793 if (nh == 0)
1794 return isqrt (nl);
1796 count_leading_zeros (shift, nh);
1797 shift &= ~1;
1799 /* Make x > sqrt(n) */
1800 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1801 x <<= (W_TYPE_SIZE - shift) / 2;
1803 /* Do we need more than one iteration? */
1804 for (;;)
1806 uintmax_t r _GL_UNUSED;
1807 uintmax_t q, y;
1808 udiv_qrnnd (q, r, nh, nl, x);
1809 y = (x + q) / 2;
1811 if (y >= x)
1813 uintmax_t hi, lo;
1814 umul_ppmm (hi, lo, x + 1, x + 1);
1815 assert (gt2 (hi, lo, nh, nl));
1817 umul_ppmm (hi, lo, x, x);
1818 assert (ge2 (nh, nl, hi, lo));
1819 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1820 assert (hi == 0);
1822 return x;
1825 x = y;
1829 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1830 #define MAGIC64 ((uint64_t) 0x0202021202030213ULL)
1831 #define MAGIC63 ((uint64_t) 0x0402483012450293ULL)
1832 #define MAGIC65 ((uint64_t) 0x218a019866014613ULL)
1833 #define MAGIC11 0x23b
1835 /* Return the square root if the input is a square, otherwise 0. */
1836 static uintmax_t _GL_ATTRIBUTE_CONST
1837 is_square (uintmax_t x)
1839 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1840 computing the square root. */
1841 if (((MAGIC64 >> (x & 63)) & 1)
1842 && ((MAGIC63 >> (x % 63)) & 1)
1843 /* Both 0 and 64 are squares mod (65) */
1844 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1845 && ((MAGIC11 >> (x % 11) & 1)))
1847 uintmax_t r = isqrt (x);
1848 if (r*r == x)
1849 return r;
1851 return 0;
1854 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1855 static const unsigned short invtab[0x81] =
1857 0x200,
1858 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1859 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1860 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1861 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1862 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1863 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1864 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1865 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1866 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1867 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1868 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1869 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1870 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1871 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1872 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1873 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1876 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1877 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1878 #define div_smallq(q, r, u, d) \
1879 do { \
1880 if ((u) / 0x40 < (d)) \
1882 int _cnt; \
1883 uintmax_t _dinv, _mask, _q, _r; \
1884 count_leading_zeros (_cnt, (d)); \
1885 _r = (u); \
1886 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1888 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1889 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1891 else \
1893 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1894 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1896 _r -= _q*(d); \
1898 _mask = -(uintmax_t) (_r >= (d)); \
1899 (r) = _r - (_mask & (d)); \
1900 (q) = _q - _mask; \
1901 assert ( (q) * (d) + (r) == u); \
1903 else \
1905 uintmax_t _q = (u) / (d); \
1906 (r) = (u) - _q * (d); \
1907 (q) = _q; \
1909 } while (0)
1911 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1912 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1913 with 3025 = 55^2.
1915 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1916 representing G_0 = (-55, 2*4652, 8653).
1918 In the notation of the paper:
1920 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1924 t_0 = floor([q_0 + R_0] / S0) = 1
1925 R_1 = t_0 * S_0 - R_0 = 4001
1926 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1929 /* Multipliers, in order of efficiency:
1930 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1931 0.7317 3*5*7 = 105 = 1
1932 0.7820 3*5*11 = 165 = 1
1933 0.7872 3*5 = 15 = 3
1934 0.8101 3*7*11 = 231 = 3
1935 0.8155 3*7 = 21 = 1
1936 0.8284 5*7*11 = 385 = 1
1937 0.8339 5*7 = 35 = 3
1938 0.8716 3*11 = 33 = 1
1939 0.8774 3 = 3 = 3
1940 0.8913 5*11 = 55 = 3
1941 0.8972 5 = 5 = 1
1942 0.9233 7*11 = 77 = 1
1943 0.9295 7 = 7 = 3
1944 0.9934 11 = 11 = 3
1946 #define QUEUE_SIZE 50
1948 #if STAT_SQUFOF
1949 # define Q_FREQ_SIZE 50
1950 /* Element 0 keeps the total */
1951 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1952 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1953 #endif
1955 /* Return true on success. Expected to fail only for numbers
1956 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1957 static bool
1958 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1960 /* Uses algorithm and notation from
1962 SQUARE FORM FACTORIZATION
1963 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1965 http://homes.cerias.purdue.edu/~ssw/squfof.pdf
1968 static const unsigned int multipliers_1[] =
1969 { /* = 1 (mod 4) */
1970 105, 165, 21, 385, 33, 5, 77, 1, 0
1972 static const unsigned int multipliers_3[] =
1973 { /* = 3 (mod 4) */
1974 1155, 15, 231, 35, 3, 55, 7, 11, 0
1977 const unsigned int *m;
1979 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1981 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1982 return false;
1984 uintmax_t sqrt_n = isqrt2 (n1, n0);
1986 if (n0 == sqrt_n * sqrt_n)
1988 uintmax_t p1, p0;
1990 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1991 assert (p0 == n0);
1993 if (n1 == p1)
1995 if (prime_p (sqrt_n))
1996 factor_insert_multiplicity (factors, sqrt_n, 2);
1997 else
1999 struct factors f;
2001 f.nfactors = 0;
2002 if (!factor_using_squfof (0, sqrt_n, &f))
2004 /* Try pollard rho instead */
2005 factor_using_pollard_rho (sqrt_n, 1, &f);
2007 /* Duplicate the new factors */
2008 for (unsigned int i = 0; i < f.nfactors; i++)
2009 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
2011 return true;
2015 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2016 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2017 *m; m++)
2019 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2020 unsigned int i;
2021 unsigned int mu = *m;
2022 unsigned int qpos = 0;
2024 assert (mu * n0 % 4 == 3);
2026 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2027 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2028 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2029 mu^3 < n.
2031 However, this seems insufficient: With n = 37243139 and mu =
2032 105, we get a trivial factor, from the square 38809 = 197^2,
2033 without any corresponding Q earlier in the iteration.
2035 Requiring 64 mu^3 < n seems sufficient. */
2036 if (n1 == 0)
2038 if ((uintmax_t) mu*mu*mu >= n0 / 64)
2039 continue;
2041 else
2043 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2044 continue;
2046 umul_ppmm (Dh, Dl, n0, mu);
2047 Dh += n1 * mu;
2049 assert (Dl % 4 != 1);
2050 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2052 S = isqrt2 (Dh, Dl);
2054 Q1 = 1;
2055 P = S;
2057 /* Square root remainder fits in one word, so ignore high part. */
2058 Q = Dl - P*P;
2059 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2060 L = isqrt (2*S);
2061 B = 2*L;
2062 L1 = mu * 2 * L;
2064 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2065 4 D. */
2067 for (i = 0; i <= B; i++)
2069 uintmax_t q, P1, t, rem;
2071 div_smallq (q, rem, S+P, Q);
2072 P1 = S - rem; /* P1 = q*Q - P */
2074 #if STAT_SQUFOF
2075 assert (q > 0);
2076 q_freq[0]++;
2077 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2078 #endif
2080 if (Q <= L1)
2082 uintmax_t g = Q;
2084 if ( (Q & 1) == 0)
2085 g /= 2;
2087 g /= gcd_odd (g, mu);
2089 if (g <= L)
2091 if (qpos >= QUEUE_SIZE)
2092 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2093 queue[qpos].Q = g;
2094 queue[qpos].P = P % g;
2095 qpos++;
2099 /* I think the difference can be either sign, but mod
2100 2^W_TYPE_SIZE arithmetic should be fine. */
2101 t = Q1 + q * (P - P1);
2102 Q1 = Q;
2103 Q = t;
2104 P = P1;
2106 if ( (i & 1) == 0)
2108 uintmax_t r = is_square (Q);
2109 if (r)
2111 for (unsigned int j = 0; j < qpos; j++)
2113 if (queue[j].Q == r)
2115 if (r == 1)
2116 /* Traversed entire cycle. */
2117 goto next_multiplier;
2119 /* Need the absolute value for divisibility test. */
2120 if (P >= queue[j].P)
2121 t = P - queue[j].P;
2122 else
2123 t = queue[j].P - P;
2124 if (t % r == 0)
2126 /* Delete entries up to and including entry
2127 j, which matched. */
2128 memmove (queue, queue + j + 1,
2129 (qpos - j - 1) * sizeof (queue[0]));
2130 qpos -= (j + 1);
2132 goto next_i;
2136 /* We have found a square form, which should give a
2137 factor. */
2138 Q1 = r;
2139 assert (S >= P); /* What signs are possible? */
2140 P += r * ((S - P) / r);
2142 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2143 for the case D = 2N. */
2144 /* Compute Q = (D - P*P) / Q1, but we need double
2145 precision. */
2146 uintmax_t hi, lo;
2147 umul_ppmm (hi, lo, P, P);
2148 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2149 udiv_qrnnd (Q, rem, hi, lo, Q1);
2150 assert (rem == 0);
2152 for (;;)
2154 /* Note: There appears to by a typo in the paper,
2155 Step 4a in the algorithm description says q <--
2156 floor([S+P]/\hat Q), but looking at the equations
2157 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2158 (In this code, \hat Q is Q1). */
2159 div_smallq (q, rem, S+P, Q);
2160 P1 = S - rem; /* P1 = q*Q - P */
2162 #if STAT_SQUFOF
2163 q_freq[0]++;
2164 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2165 #endif
2166 if (P == P1)
2167 break;
2168 t = Q1 + q * (P - P1);
2169 Q1 = Q;
2170 Q = t;
2171 P = P1;
2174 if ( (Q & 1) == 0)
2175 Q /= 2;
2176 Q /= gcd_odd (Q, mu);
2178 assert (Q > 1 && (n1 || Q < n0));
2180 if (prime_p (Q))
2181 factor_insert (factors, Q);
2182 else if (!factor_using_squfof (0, Q, factors))
2183 factor_using_pollard_rho (Q, 2, factors);
2185 divexact_21 (n1, n0, n1, n0, Q);
2187 if (prime2_p (n1, n0))
2188 factor_insert_large (factors, n1, n0);
2189 else
2191 if (!factor_using_squfof (n1, n0, factors))
2193 if (n1 == 0)
2194 factor_using_pollard_rho (n0, 1, factors);
2195 else
2196 factor_using_pollard_rho2 (n1, n0, 1, factors);
2200 return true;
2203 next_i:;
2205 next_multiplier:;
2207 return false;
2210 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2211 results in FACTORS. Use the algorithm selected by the global ALG. */
2212 static void
2213 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2215 factors->nfactors = 0;
2216 factors->plarge[1] = 0;
2218 if (t1 == 0 && t0 < 2)
2219 return;
2221 t0 = factor_using_division (&t1, t1, t0, factors);
2223 if (t1 == 0 && t0 < 2)
2224 return;
2226 if (prime2_p (t1, t0))
2227 factor_insert_large (factors, t1, t0);
2228 else
2230 if (alg == ALG_SQUFOF)
2231 if (factor_using_squfof (t1, t0, factors))
2232 return;
2234 if (t1 == 0)
2235 factor_using_pollard_rho (t0, 1, factors);
2236 else
2237 factor_using_pollard_rho2 (t1, t0, 1, factors);
2241 #if HAVE_GMP
2242 /* Use Pollard-rho to compute the prime factors of
2243 arbitrary-precision T, and put the results in FACTORS. */
2244 static void
2245 mp_factor (mpz_t t, struct mp_factors *factors)
2247 mp_factor_init (factors);
2249 if (mpz_sgn (t) != 0)
2251 mp_factor_using_division (t, factors);
2253 if (mpz_cmp_ui (t, 1) != 0)
2255 devmsg ("[is number prime?] ");
2256 if (mp_prime_p (t))
2257 mp_factor_insert (factors, t);
2258 else
2259 mp_factor_using_pollard_rho (t, 1, factors);
2263 #endif
2265 static strtol_error
2266 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2268 unsigned int lo_carry;
2269 uintmax_t hi = 0, lo = 0;
2271 strtol_error err = LONGINT_INVALID;
2273 /* Skip initial spaces and '+'. */
2274 for (;;)
2276 char c = *s;
2277 if (c == ' ')
2278 s++;
2279 else if (c == '+')
2281 s++;
2282 break;
2284 else
2285 break;
2288 /* Initial scan for invalid digits. */
2289 const char *p = s;
2290 for (;;)
2292 unsigned int c = *p++;
2293 if (c == 0)
2294 break;
2296 if (UNLIKELY (!ISDIGIT (c)))
2298 err = LONGINT_INVALID;
2299 break;
2302 err = LONGINT_OK; /* we've seen at least one valid digit */
2305 for (;err == LONGINT_OK;)
2307 unsigned int c = *s++;
2308 if (c == 0)
2309 break;
2311 c -= '0';
2313 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2315 err = LONGINT_OVERFLOW;
2316 break;
2318 hi = 10 * hi;
2320 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2321 lo_carry += 10 * lo < 2 * lo;
2323 lo = 10 * lo;
2324 lo += c;
2326 lo_carry += lo < c;
2327 hi += lo_carry;
2328 if (UNLIKELY (hi < lo_carry))
2330 err = LONGINT_OVERFLOW;
2331 break;
2335 *hip = hi;
2336 *lop = lo;
2338 return err;
2341 static void
2342 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2344 uintmax_t q, r;
2346 if (t1 == 0)
2347 printf ("%"PRIuMAX, t0);
2348 else
2350 /* Use very plain code here since it seems hard to write fast code
2351 without assuming a specific word size. */
2352 q = t1 / 1000000000;
2353 r = t1 % 1000000000;
2354 udiv_qrnnd (t0, r, r, t0, 1000000000);
2355 print_uintmaxes (q, t0);
2356 printf ("%09u", (int) r);
2360 /* Single-precision factoring */
2361 static void
2362 print_factors_single (uintmax_t t1, uintmax_t t0)
2364 struct factors factors;
2366 print_uintmaxes (t1, t0);
2367 putchar (':');
2369 factor (t1, t0, &factors);
2371 for (unsigned int j = 0; j < factors.nfactors; j++)
2372 for (unsigned int k = 0; k < factors.e[j]; k++)
2374 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2375 putchar (' ');
2376 fputs (umaxtostr (factors.p[j], buf), stdout);
2379 if (factors.plarge[1])
2381 putchar (' ');
2382 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2384 putchar ('\n');
2387 /* Emit the factors of the indicated number. If we have the option of using
2388 either algorithm, we select on the basis of the length of the number.
2389 For longer numbers, we prefer the MP algorithm even if the native algorithm
2390 has enough digits, because the algorithm is better. The turnover point
2391 depends on the value. */
2392 static bool
2393 print_factors (const char *input)
2395 uintmax_t t1, t0;
2397 /* Try converting the number to one or two words. If it fails, use GMP or
2398 print an error message. The 2nd condition checks that the most
2399 significant bit of the two-word number is clear, in a typesize neutral
2400 way. */
2401 strtol_error err = strto2uintmax (&t1, &t0, input);
2403 switch (err)
2405 case LONGINT_OK:
2406 if (((t1 << 1) >> 1) == t1)
2408 devmsg ("[using single-precision arithmetic] ");
2409 print_factors_single (t1, t0);
2410 return true;
2412 break;
2414 case LONGINT_OVERFLOW:
2415 /* Try GMP. */
2416 break;
2418 default:
2419 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2420 return false;
2423 #if HAVE_GMP
2424 devmsg ("[using arbitrary-precision arithmetic] ");
2425 mpz_t t;
2426 struct mp_factors factors;
2428 mpz_init_set_str (t, input, 10);
2430 gmp_printf ("%Zd:", t);
2431 mp_factor (t, &factors);
2433 for (unsigned int j = 0; j < factors.nfactors; j++)
2434 for (unsigned int k = 0; k < factors.e[j]; k++)
2435 gmp_printf (" %Zd", factors.p[j]);
2437 mp_factor_clear (&factors);
2438 mpz_clear (t);
2439 putchar ('\n');
2440 return true;
2441 #else
2442 error (0, 0, _("%s is too large"), quote (input));
2443 return false;
2444 #endif
2447 void
2448 usage (int status)
2450 if (status != EXIT_SUCCESS)
2451 emit_try_help ();
2452 else
2454 printf (_("\
2455 Usage: %s [NUMBER]...\n\
2456 or: %s OPTION\n\
2458 program_name, program_name);
2459 fputs (_("\
2460 Print the prime factors of each specified integer NUMBER. If none\n\
2461 are specified on the command line, read them from standard input.\n\
2463 "), stdout);
2464 fputs (HELP_OPTION_DESCRIPTION, stdout);
2465 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2466 emit_ancillary_info ();
2468 exit (status);
2471 static bool
2472 do_stdin (void)
2474 bool ok = true;
2475 token_buffer tokenbuffer;
2477 init_tokenbuffer (&tokenbuffer);
2479 while (true)
2481 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2482 &tokenbuffer);
2483 if (token_length == (size_t) -1)
2484 break;
2485 ok &= print_factors (tokenbuffer.buffer);
2487 free (tokenbuffer.buffer);
2489 return ok;
2493 main (int argc, char **argv)
2495 initialize_main (&argc, &argv);
2496 set_program_name (argv[0]);
2497 setlocale (LC_ALL, "");
2498 bindtextdomain (PACKAGE, LOCALEDIR);
2499 textdomain (PACKAGE);
2501 atexit (close_stdout);
2503 alg = ALG_POLLARD_RHO; /* Default to Pollard rho */
2505 int c;
2506 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2508 switch (c)
2510 case DEV_DEBUG_OPTION:
2511 dev_debug = true;
2512 break;
2514 case 's':
2515 alg = ALG_SQUFOF;
2516 break;
2518 case 'w':
2519 flag_prove_primality = false;
2520 break;
2522 case_GETOPT_HELP_CHAR;
2524 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2526 default:
2527 usage (EXIT_FAILURE);
2531 #if STAT_SQUFOF
2532 if (alg == ALG_SQUFOF)
2533 memset (q_freq, 0, sizeof (q_freq));
2534 #endif
2536 bool ok;
2537 if (argc <= optind)
2538 ok = do_stdin ();
2539 else
2541 ok = true;
2542 for (int i = optind; i < argc; i++)
2543 if (! print_factors (argv[i]))
2544 ok = false;
2547 #if STAT_SQUFOF
2548 if (alg == ALG_SQUFOF && q_freq[0] > 0)
2550 double acc_f;
2551 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2552 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2554 double f = (double) q_freq[i] / q_freq[0];
2555 acc_f += f;
2556 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2557 100.0 * f, 100.0 * acc_f);
2560 #endif
2562 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);