factor: apply a more general fix to enable correct assembly
[coreutils.git] / src / factor.c
blob95451a58d2a9add8c4cfa4a9ffa835484e4219bb
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synched 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or the SQUFOF algorithm, checking status of found factors
54 again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 #include <config.h>
88 #include <getopt.h>
89 #include <stdio.h>
90 #if HAVE_GMP
91 # include <gmp.h>
92 # if !HAVE_DECL_MPZ_INITS
93 # include <stdarg.h>
94 # endif
95 #endif
97 #include <assert.h>
99 #include "system.h"
100 #include "error.h"
101 #include "quote.h"
102 #include "readtokens.h"
103 #include "xstrtol.h"
105 /* The official name of this program (e.g., no 'g' prefix). */
106 #define PROGRAM_NAME "factor"
108 #define AUTHORS \
109 proper_name ("Paul Rubin"), \
110 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
111 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
113 /* Token delimiters when reading from a file. */
114 #define DELIM "\n\t "
116 #ifndef STAT_SQUFOF
117 # define STAT_SQUFOF 0
118 #endif
120 #ifndef USE_LONGLONG_H
121 /* With the way we use longlong.h, it's only safe to use
122 when UWtype = UHWtype, as there were various cases
123 (as can be seen in the history for longlong.h) where
124 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
125 to avoid compile time or run time issues. */
126 # if LONG_MAX == INTMAX_MAX
127 # define USE_LONGLONG_H 1
128 # endif
129 #endif
131 #if USE_LONGLONG_H
133 /* Make definitions for longlong.h to make it do what it can do for us */
135 /* bitcount for uintmax_t */
136 # if UINTMAX_MAX == UINT32_MAX
137 # define W_TYPE_SIZE 32
138 # elif UINTMAX_MAX == UINT64_MAX
139 # define W_TYPE_SIZE 64
140 # elif UINTMAX_MAX == UINT128_MAX
141 # define W_TYPE_SIZE 128
142 # endif
144 # define UWtype uintmax_t
145 # define UHWtype unsigned long int
146 # undef UDWtype
147 # if HAVE_ATTRIBUTE_MODE
148 typedef unsigned int UQItype __attribute__ ((mode (QI)));
149 typedef int SItype __attribute__ ((mode (SI)));
150 typedef unsigned int USItype __attribute__ ((mode (SI)));
151 typedef int DItype __attribute__ ((mode (DI)));
152 typedef unsigned int UDItype __attribute__ ((mode (DI)));
153 # else
154 typedef unsigned char UQItype;
155 typedef long SItype;
156 typedef unsigned long int USItype;
157 # if HAVE_LONG_LONG_INT
158 typedef long long int DItype;
159 typedef unsigned long long int UDItype;
160 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
161 typedef long int DItype;
162 typedef unsigned long int UDItype;
163 # endif
164 # endif
165 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
166 # define ASSERT(x) /* FIXME make longlong.h really standalone */
167 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
168 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
169 # ifndef __GMP_GNUC_PREREQ
170 # define __GMP_GNUC_PREREQ(a,b) 1
171 # endif
173 /* These stub macros are only used in longlong.h in certain system compiler
174 combinations, so ensure usage to avoid -Wunused-macros warnings. */
175 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
176 ASSERT (1)
177 __GMP_DECLSPEC
178 # endif
180 # if _ARCH_PPC
181 # define HAVE_HOST_CPU_FAMILY_powerpc 1
182 # endif
183 # include "longlong.h"
184 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
185 const unsigned char factor_clz_tab[129] =
187 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
188 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
189 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
190 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
193 # endif
195 #else /* not USE_LONGLONG_H */
197 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
198 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
199 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
200 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
202 #endif
204 #if !defined __clz_tab && !defined UHWtype
205 /* Without this seemingly useless conditional, gcc -Wunused-macros
206 warns that each of the two tested macros is unused on Fedora 18.
207 FIXME: this is just an ugly band-aid. Fix it properly. */
208 #endif
210 enum alg_type { ALG_POLLARD_RHO = 1, ALG_SQUFOF = 2 };
212 static enum alg_type alg;
214 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
215 #define MAX_NFACTS 26
217 enum
219 VERBOSE_OPTION = CHAR_MAX + 1
222 static struct option const long_options[] =
224 {"verbose", no_argument, NULL, VERBOSE_OPTION},
225 {GETOPT_HELP_OPTION_DECL},
226 {GETOPT_VERSION_OPTION_DECL},
227 {NULL, 0, NULL, 0}
230 struct factors
232 uintmax_t plarge[2]; /* Can have a single large factor */
233 uintmax_t p[MAX_NFACTS];
234 unsigned char e[MAX_NFACTS];
235 unsigned char nfactors;
238 #if HAVE_GMP
239 struct mp_factors
241 mpz_t *p;
242 unsigned long int *e;
243 unsigned long int nfactors;
245 #endif
247 static void factor (uintmax_t, uintmax_t, struct factors *);
249 #ifndef umul_ppmm
250 # define umul_ppmm(w1, w0, u, v) \
251 do { \
252 uintmax_t __x0, __x1, __x2, __x3; \
253 unsigned long int __ul, __vl, __uh, __vh; \
254 uintmax_t __u = (u), __v = (v); \
256 __ul = __ll_lowpart (__u); \
257 __uh = __ll_highpart (__u); \
258 __vl = __ll_lowpart (__v); \
259 __vh = __ll_highpart (__v); \
261 __x0 = (uintmax_t) __ul * __vl; \
262 __x1 = (uintmax_t) __ul * __vh; \
263 __x2 = (uintmax_t) __uh * __vl; \
264 __x3 = (uintmax_t) __uh * __vh; \
266 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
267 __x1 += __x2; /* but this indeed can */ \
268 if (__x1 < __x2) /* did we get it? */ \
269 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
271 (w1) = __x3 + __ll_highpart (__x1); \
272 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
273 } while (0)
274 #endif
276 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
277 /* Define our own, not needing normalization. This function is
278 currently not performance critical, so keep it simple. Similar to
279 the mod macro below. */
280 # undef udiv_qrnnd
281 # define udiv_qrnnd(q, r, n1, n0, d) \
282 do { \
283 uintmax_t __d1, __d0, __q, __r1, __r0; \
285 assert ((n1) < (d)); \
286 __d1 = (d); __d0 = 0; \
287 __r1 = (n1); __r0 = (n0); \
288 __q = 0; \
289 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
291 rsh2 (__d1, __d0, __d1, __d0, 1); \
292 __q <<= 1; \
293 if (ge2 (__r1, __r0, __d1, __d0)) \
295 __q++; \
296 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
299 (r) = __r0; \
300 (q) = __q; \
301 } while (0)
302 #endif
304 #if !defined add_ssaaaa
305 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
306 do { \
307 uintmax_t _add_x; \
308 _add_x = (al) + (bl); \
309 (sh) = (ah) + (bh) + (_add_x < (al)); \
310 (sl) = _add_x; \
311 } while (0)
312 #endif
314 #define rsh2(rh, rl, ah, al, cnt) \
315 do { \
316 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
317 (rh) = (ah) >> (cnt); \
318 } while (0)
320 #define lsh2(rh, rl, ah, al, cnt) \
321 do { \
322 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
323 (rl) = (al) << (cnt); \
324 } while (0)
326 #define ge2(ah, al, bh, bl) \
327 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
329 #define gt2(ah, al, bh, bl) \
330 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
332 #ifndef sub_ddmmss
333 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
334 do { \
335 uintmax_t _cy; \
336 _cy = (al) < (bl); \
337 (rl) = (al) - (bl); \
338 (rh) = (ah) - (bh) - _cy; \
339 } while (0)
340 #endif
342 #ifndef count_leading_zeros
343 # define count_leading_zeros(count, x) do { \
344 uintmax_t __clz_x = (x); \
345 unsigned int __clz_c; \
346 for (__clz_c = 0; \
347 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
348 __clz_c += 8) \
349 __clz_x <<= 8; \
350 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
351 __clz_x <<= 1; \
352 (count) = __clz_c; \
353 } while (0)
354 #endif
356 #ifndef count_trailing_zeros
357 # define count_trailing_zeros(count, x) do { \
358 uintmax_t __ctz_x = (x); \
359 unsigned int __ctz_c = 0; \
360 while ((__ctz_x & 1) == 0) \
362 __ctz_x >>= 1; \
363 __ctz_c++; \
365 (count) = __ctz_c; \
366 } while (0)
367 #endif
369 /* Requires that a < n and b <= n */
370 #define submod(r,a,b,n) \
371 do { \
372 uintmax_t _t = - (uintmax_t) (a < b); \
373 (r) = ((n) & _t) + (a) - (b); \
374 } while (0)
376 #define addmod(r,a,b,n) \
377 submod ((r), (a), ((n) - (b)), (n))
379 /* Modular two-word addition and subtraction. For performance reasons, the
380 most significant bit of n1 must be clear. The destination variables must be
381 distinct from the mod operand. */
382 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
383 do { \
384 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
385 if (ge2 ((r1), (r0), (n1), (n0))) \
386 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
387 } while (0)
388 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
389 do { \
390 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
391 if ((intmax_t) (r1) < 0) \
392 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
393 } while (0)
395 #define HIGHBIT_TO_MASK(x) \
396 (((intmax_t)-1 >> 1) < 0 \
397 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
398 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
399 ? UINTMAX_MAX : (uintmax_t) 0))
401 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
402 Requires that d1 != 0. */
403 static uintmax_t
404 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
406 int cntd, cnta;
408 assert (d1 != 0);
410 if (a1 == 0)
412 *r1 = 0;
413 return a0;
416 count_leading_zeros (cntd, d1);
417 count_leading_zeros (cnta, a1);
418 int cnt = cntd - cnta;
419 lsh2 (d1, d0, d1, d0, cnt);
420 for (int i = 0; i < cnt; i++)
422 if (ge2 (a1, a0, d1, d0))
423 sub_ddmmss (a1, a0, a1, a0, d1, d0);
424 rsh2 (d1, d0, d1, d0, 1);
427 *r1 = a1;
428 return a0;
431 static uintmax_t _GL_ATTRIBUTE_CONST
432 gcd_odd (uintmax_t a, uintmax_t b)
434 if ( (b & 1) == 0)
436 uintmax_t t = b;
437 b = a;
438 a = t;
440 if (a == 0)
441 return b;
443 /* Take out least significant one bit, to make room for sign */
444 b >>= 1;
446 for (;;)
448 uintmax_t t;
449 uintmax_t bgta;
451 while ((a & 1) == 0)
452 a >>= 1;
453 a >>= 1;
455 t = a - b;
456 if (t == 0)
457 return (a << 1) + 1;
459 bgta = HIGHBIT_TO_MASK (t);
461 /* b <-- min (a, b) */
462 b += (bgta & t);
464 /* a <-- |a - b| */
465 a = (t ^ bgta) - bgta;
469 static uintmax_t
470 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
472 while ((a0 & 1) == 0)
473 rsh2 (a1, a0, a1, a0, 1);
474 while ((b0 & 1) == 0)
475 rsh2 (b1, b0, b1, b0, 1);
477 for (;;)
479 if ((b1 | a1) == 0)
481 *r1 = 0;
482 return gcd_odd (b0, a0);
485 if (gt2 (a1, a0, b1, b0))
487 sub_ddmmss (a1, a0, a1, a0, b1, b0);
489 rsh2 (a1, a0, a1, a0, 1);
490 while ((a0 & 1) == 0);
492 else if (gt2 (b1, b0, a1, a0))
494 sub_ddmmss (b1, b0, b1, b0, a1, a0);
496 rsh2 (b1, b0, b1, b0, 1);
497 while ((b0 & 1) == 0);
499 else
500 break;
503 *r1 = a1;
504 return a0;
507 static void
508 factor_insert_multiplicity (struct factors *factors,
509 uintmax_t prime, unsigned int m)
511 unsigned int nfactors = factors->nfactors;
512 uintmax_t *p = factors->p;
513 unsigned char *e = factors->e;
515 /* Locate position for insert new or increment e. */
516 int i;
517 for (i = nfactors - 1; i >= 0; i--)
519 if (p[i] <= prime)
520 break;
523 if (i < 0 || p[i] != prime)
525 for (int j = nfactors - 1; j > i; j--)
527 p[j + 1] = p[j];
528 e[j + 1] = e[j];
530 p[i + 1] = prime;
531 e[i + 1] = m;
532 factors->nfactors = nfactors + 1;
534 else
536 e[i] += m;
540 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
542 static void
543 factor_insert_large (struct factors *factors,
544 uintmax_t p1, uintmax_t p0)
546 if (p1 > 0)
548 assert (factors->plarge[1] == 0);
549 factors->plarge[0] = p0;
550 factors->plarge[1] = p1;
552 else
553 factor_insert (factors, p0);
556 #if HAVE_GMP
558 # if !HAVE_DECL_MPZ_INITS
560 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
561 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
563 static void
564 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
566 va_list ap;
568 va_start (ap, mpz_single_init);
570 mpz_t *mpz;
571 while ((mpz = va_arg (ap, mpz_t *)))
572 mpz_single_init (*mpz);
574 va_end (ap);
576 # endif
578 static void mp_factor (mpz_t, struct mp_factors *);
580 static void
581 mp_factor_init (struct mp_factors *factors)
583 factors->p = NULL;
584 factors->e = NULL;
585 factors->nfactors = 0;
588 static void
589 mp_factor_clear (struct mp_factors *factors)
591 for (unsigned int i = 0; i < factors->nfactors; i++)
592 mpz_clear (factors->p[i]);
594 free (factors->p);
595 free (factors->e);
598 static void
599 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
601 unsigned long int nfactors = factors->nfactors;
602 mpz_t *p = factors->p;
603 unsigned long int *e = factors->e;
604 long i;
606 /* Locate position for insert new or increment e. */
607 for (i = nfactors - 1; i >= 0; i--)
609 if (mpz_cmp (p[i], prime) <= 0)
610 break;
613 if (i < 0 || mpz_cmp (p[i], prime) != 0)
615 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
616 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
618 mpz_init (p[nfactors]);
619 for (long j = nfactors - 1; j > i; j--)
621 mpz_set (p[j + 1], p[j]);
622 e[j + 1] = e[j];
624 mpz_set (p[i + 1], prime);
625 e[i + 1] = 1;
627 factors->p = p;
628 factors->e = e;
629 factors->nfactors = nfactors + 1;
631 else
633 e[i] += 1;
637 static void
638 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
640 mpz_t pz;
642 mpz_init_set_ui (pz, prime);
643 mp_factor_insert (factors, pz);
644 mpz_clear (pz);
646 #endif /* HAVE_GMP */
649 /* Number of bits in an uintmax_t. */
650 enum { W = sizeof (uintmax_t) * CHAR_BIT };
652 /* Verify that uintmax_t does not have holes in its representation. */
653 verify (UINTMAX_MAX >> (W - 1) == 1);
655 #define P(a,b,c,d) a,
656 static const unsigned char primes_diff[] = {
657 #include "primes.h"
658 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
660 #undef P
662 #define PRIMES_PTAB_ENTRIES \
663 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
665 #define P(a,b,c,d) b,
666 static const unsigned char primes_diff8[] = {
667 #include "primes.h"
668 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
670 #undef P
672 struct primes_dtab
674 uintmax_t binv, lim;
677 #define P(a,b,c,d) {c,d},
678 static const struct primes_dtab primes_dtab[] = {
679 #include "primes.h"
680 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
682 #undef P
684 /* Verify that uintmax_t is not wider than
685 the integers used to generate primes.h. */
686 verify (W <= WIDE_UINT_BITS);
688 /* This flag is honored only in the GMP code. */
689 static int verbose = 0;
691 /* Prove primality or run probabilistic tests. */
692 static bool flag_prove_primality = true;
694 /* Number of Miller-Rabin tests to run when not proving primality. */
695 #define MR_REPS 25
697 #ifdef __GNUC__
698 # define LIKELY(cond) __builtin_expect ((cond), 1)
699 # define UNLIKELY(cond) __builtin_expect ((cond), 0)
700 #else
701 # define LIKELY(cond) (cond)
702 # define UNLIKELY(cond) (cond)
703 #endif
705 static void
706 debug (char const *fmt, ...)
708 if (verbose)
710 va_list ap;
711 va_start (ap, fmt);
712 vfprintf (stderr, fmt, ap);
713 va_end (ap);
717 static void
718 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
719 unsigned int off)
721 for (unsigned int j = 0; j < off; j++)
722 p += primes_diff[i + j];
723 factor_insert (factors, p);
726 /* Trial division with odd primes uses the following trick.
728 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
729 consider the case t < B (this is the second loop below).
731 From our tables we get
733 binv = p^{-1} (mod B)
734 lim = floor ( (B-1) / p ).
736 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
737 (and all quotients in this range occur for some t).
739 Then t = q * p is true also (mod B), and p is invertible we get
741 q = t * binv (mod B).
743 Next, assume that t is *not* divisible by p. Since multiplication
744 by binv (mod B) is a one-to-one mapping,
746 t * binv (mod B) > lim,
748 because all the smaller values are already taken.
750 This can be summed up by saying that the function
752 q(t) = binv * t (mod B)
754 is a permutation of the range 0 <= t < B, with the curious property
755 that it maps the multiples of p onto the range 0 <= q <= lim, in
756 order, and the non-multiples of p onto the range lim < q < B.
759 static uintmax_t
760 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
761 struct factors *factors)
763 if (t0 % 2 == 0)
765 unsigned int cnt;
767 if (t0 == 0)
769 count_trailing_zeros (cnt, t1);
770 t0 = t1 >> cnt;
771 t1 = 0;
772 cnt += W_TYPE_SIZE;
774 else
776 count_trailing_zeros (cnt, t0);
777 rsh2 (t1, t0, t1, t0, cnt);
780 factor_insert_multiplicity (factors, 2, cnt);
783 uintmax_t p = 3;
784 unsigned int i;
785 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
787 for (;;)
789 uintmax_t q1, q0, hi, lo ATTRIBUTE_UNUSED;
791 q0 = t0 * primes_dtab[i].binv;
792 umul_ppmm (hi, lo, q0, p);
793 if (hi > t1)
794 break;
795 hi = t1 - hi;
796 q1 = hi * primes_dtab[i].binv;
797 if (LIKELY (q1 > primes_dtab[i].lim))
798 break;
799 t1 = q1; t0 = q0;
800 factor_insert (factors, p);
802 p += primes_diff[i + 1];
804 if (t1p)
805 *t1p = t1;
807 #define DIVBLOCK(I) \
808 do { \
809 for (;;) \
811 q = t0 * pd[I].binv; \
812 if (LIKELY (q > pd[I].lim)) \
813 break; \
814 t0 = q; \
815 factor_insert_refind (factors, p, i + 1, I); \
817 } while (0)
819 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
821 uintmax_t q;
822 const struct primes_dtab *pd = &primes_dtab[i];
823 DIVBLOCK (0);
824 DIVBLOCK (1);
825 DIVBLOCK (2);
826 DIVBLOCK (3);
827 DIVBLOCK (4);
828 DIVBLOCK (5);
829 DIVBLOCK (6);
830 DIVBLOCK (7);
832 p += primes_diff8[i];
833 if (p * p > t0)
834 break;
837 return t0;
840 #if HAVE_GMP
841 static void
842 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
844 mpz_t q;
845 unsigned long int p;
847 debug ("[trial division] ");
849 mpz_init (q);
851 p = mpz_scan1 (t, 0);
852 mpz_div_2exp (t, t, p);
853 while (p)
855 mp_factor_insert_ui (factors, 2);
856 --p;
859 p = 3;
860 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
862 if (! mpz_divisible_ui_p (t, p))
864 p += primes_diff[i++];
865 if (mpz_cmp_ui (t, p * p) < 0)
866 break;
868 else
870 mpz_tdiv_q_ui (t, t, p);
871 mp_factor_insert_ui (factors, p);
875 mpz_clear (q);
877 #endif
879 /* Entry i contains (2i+1)^(-1) mod 2^8. */
880 static const unsigned char binvert_table[128] =
882 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
883 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
884 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
885 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
886 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
887 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
888 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
889 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
890 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
891 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
892 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
893 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
894 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
895 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
896 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
897 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
900 /* Compute n^(-1) mod B, using a Newton iteration. */
901 #define binv(inv,n) \
902 do { \
903 uintmax_t __n = (n); \
904 uintmax_t __inv; \
906 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
907 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
908 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
909 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
911 if (W_TYPE_SIZE > 64) \
913 int __invbits = 64; \
914 do { \
915 __inv = 2 * __inv - __inv * __inv * __n; \
916 __invbits *= 2; \
917 } while (__invbits < W_TYPE_SIZE); \
920 (inv) = __inv; \
921 } while (0)
923 /* q = u / d, assuming d|u. */
924 #define divexact_21(q1, q0, u1, u0, d) \
925 do { \
926 uintmax_t _di, _q0; \
927 binv (_di, (d)); \
928 _q0 = (u0) * _di; \
929 if ((u1) >= (d)) \
931 uintmax_t _p1, _p0 ATTRIBUTE_UNUSED; \
932 umul_ppmm (_p1, _p0, _q0, d); \
933 (q1) = ((u1) - _p1) * _di; \
934 (q0) = _q0; \
936 else \
938 (q0) = _q0; \
939 (q1) = 0; \
941 } while (0)
943 /* x B (mod n). */
944 #define redcify(r_prim, r, n) \
945 do { \
946 uintmax_t _redcify_q ATTRIBUTE_UNUSED; \
947 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
948 } while (0)
950 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
951 #define redcify2(r1, r0, x, n1, n0) \
952 do { \
953 uintmax_t _r1, _r0, _i; \
954 if ((x) < (n1)) \
956 _r1 = (x); _r0 = 0; \
957 _i = W_TYPE_SIZE; \
959 else \
961 _r1 = 0; _r0 = (x); \
962 _i = 2*W_TYPE_SIZE; \
964 while (_i-- > 0) \
966 lsh2 (_r1, _r0, _r1, _r0, 1); \
967 if (ge2 (_r1, _r0, (n1), (n0))) \
968 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
970 (r1) = _r1; \
971 (r0) = _r0; \
972 } while (0)
974 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
975 Both a and b must be in redc form, the result will be in redc form too. */
976 static inline uintmax_t
977 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
979 uintmax_t rh, rl, q, th, tl ATTRIBUTE_UNUSED, xh;
981 umul_ppmm (rh, rl, a, b);
982 q = rl * mi;
983 umul_ppmm (th, tl, q, m);
984 xh = rh - th;
985 if (rh < th)
986 xh += m;
988 return xh;
991 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
992 Both a and b must be in redc form, the result will be in redc form too.
993 For performance reasons, the most significant bit of m must be clear. */
994 static uintmax_t
995 mulredc2 (uintmax_t *r1p,
996 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
997 uintmax_t m1, uintmax_t m0, uintmax_t mi)
999 uintmax_t r1, r0, q, p1, p0 ATTRIBUTE_UNUSED, t1, t0, s1, s0;
1000 mi = -mi;
1001 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
1002 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
1003 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
1005 /* First compute a0 * <b1, b0> B^{-1}
1006 +-----+
1007 |a0 b0|
1008 +--+--+--+
1009 |a0 b1|
1010 +--+--+--+
1011 |q0 m0|
1012 +--+--+--+
1013 |q0 m1|
1014 -+--+--+--+
1015 |r1|r0| 0|
1016 +--+--+--+
1018 umul_ppmm (t1, t0, a0, b0);
1019 umul_ppmm (r1, r0, a0, b1);
1020 q = mi * t0;
1021 umul_ppmm (p1, p0, q, m0);
1022 umul_ppmm (s1, s0, q, m1);
1023 r0 += (t0 != 0); /* Carry */
1024 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1025 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1026 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1028 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1029 +-----+
1030 |a1 b0|
1031 +--+--+
1032 |r1|r0|
1033 +--+--+--+
1034 |a1 b1|
1035 +--+--+--+
1036 |q1 m0|
1037 +--+--+--+
1038 |q1 m1|
1039 -+--+--+--+
1040 |r1|r0| 0|
1041 +--+--+--+
1043 umul_ppmm (t1, t0, a1, b0);
1044 umul_ppmm (s1, s0, a1, b1);
1045 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1046 q = mi * t0;
1047 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1048 umul_ppmm (p1, p0, q, m0);
1049 umul_ppmm (s1, s0, q, m1);
1050 r0 += (t0 != 0); /* Carry */
1051 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1052 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1053 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1055 if (ge2 (r1, r0, m1, m0))
1056 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1058 *r1p = r1;
1059 return r0;
1062 static uintmax_t _GL_ATTRIBUTE_CONST
1063 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1065 uintmax_t y = one;
1067 if (e & 1)
1068 y = b;
1070 while (e != 0)
1072 b = mulredc (b, b, n, ni);
1073 e >>= 1;
1075 if (e & 1)
1076 y = mulredc (y, b, n, ni);
1079 return y;
1082 static uintmax_t
1083 powm2 (uintmax_t *r1m,
1084 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1085 uintmax_t ni, const uintmax_t *one)
1087 uintmax_t r1, r0, b1, b0, n1, n0;
1088 unsigned int i;
1089 uintmax_t e;
1091 b0 = bp[0];
1092 b1 = bp[1];
1093 n0 = np[0];
1094 n1 = np[1];
1096 r0 = one[0];
1097 r1 = one[1];
1099 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1101 if (e & 1)
1103 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1104 r1 = *r1m;
1106 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1107 b1 = *r1m;
1109 for (e = ep[1]; e > 0; e >>= 1)
1111 if (e & 1)
1113 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1114 r1 = *r1m;
1116 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1117 b1 = *r1m;
1119 *r1m = r1;
1120 return r0;
1123 static bool _GL_ATTRIBUTE_CONST
1124 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1125 unsigned int k, uintmax_t one)
1127 uintmax_t y = powm (b, q, n, ni, one);
1129 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1131 if (y == one || y == nm1)
1132 return true;
1134 for (unsigned int i = 1; i < k; i++)
1136 y = mulredc (y, y, n, ni);
1138 if (y == nm1)
1139 return true;
1140 if (y == one)
1141 return false;
1143 return false;
1146 static bool
1147 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1148 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1150 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1152 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1153 y1 = r1m;
1155 if (y0 == one[0] && y1 == one[1])
1156 return true;
1158 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1160 if (y0 == nm1_0 && y1 == nm1_1)
1161 return true;
1163 for (unsigned int i = 1; i < k; i++)
1165 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1166 y1 = r1m;
1168 if (y0 == nm1_0 && y1 == nm1_1)
1169 return true;
1170 if (y0 == one[0] && y1 == one[1])
1171 return false;
1173 return false;
1176 #if HAVE_GMP
1177 static bool
1178 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1179 mpz_srcptr q, unsigned long int k)
1181 mpz_powm (y, x, q, n);
1183 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1184 return true;
1186 for (unsigned long int i = 1; i < k; i++)
1188 mpz_powm_ui (y, y, 2, n);
1189 if (mpz_cmp (y, nm1) == 0)
1190 return true;
1191 if (mpz_cmp_ui (y, 1) == 0)
1192 return false;
1194 return false;
1196 #endif
1198 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1199 have been observed. The average seem to be about 2. */
1200 static bool
1201 prime_p (uintmax_t n)
1203 int k;
1204 bool is_prime;
1205 uintmax_t a_prim, one, ni;
1206 struct factors factors;
1208 if (n <= 1)
1209 return false;
1211 /* We have already casted out small primes. */
1212 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1213 return true;
1215 /* Precomputation for Miller-Rabin. */
1216 uintmax_t q = n - 1;
1217 for (k = 0; (q & 1) == 0; k++)
1218 q >>= 1;
1220 uintmax_t a = 2;
1221 binv (ni, n); /* ni <- 1/n mod B */
1222 redcify (one, 1, n);
1223 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1225 /* Perform a Miller-Rabin test, finds most composites quickly. */
1226 if (!millerrabin (n, ni, a_prim, q, k, one))
1227 return false;
1229 if (flag_prove_primality)
1231 /* Factor n-1 for Lucas. */
1232 factor (0, n - 1, &factors);
1235 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1236 number composite. */
1237 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1239 if (flag_prove_primality)
1241 is_prime = true;
1242 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1244 is_prime
1245 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1248 else
1250 /* After enough Miller-Rabin runs, be content. */
1251 is_prime = (r == MR_REPS - 1);
1254 if (is_prime)
1255 return true;
1257 a += primes_diff[r]; /* Establish new base. */
1259 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1260 on most processors, since it avoids udiv_qrnnd. If we go down the
1261 udiv_qrnnd_preinv path, this code should be replaced. */
1263 uintmax_t s1, s0;
1264 umul_ppmm (s1, s0, one, a);
1265 if (LIKELY (s1 == 0))
1266 a_prim = s0 % n;
1267 else
1269 uintmax_t dummy ATTRIBUTE_UNUSED;
1270 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1274 if (!millerrabin (n, ni, a_prim, q, k, one))
1275 return false;
1278 error (0, 0, _("Lucas prime test failure. This should not happen"));
1279 abort ();
1282 static bool
1283 prime2_p (uintmax_t n1, uintmax_t n0)
1285 uintmax_t q[2], nm1[2];
1286 uintmax_t a_prim[2];
1287 uintmax_t one[2];
1288 uintmax_t na[2];
1289 uintmax_t ni;
1290 unsigned int k;
1291 struct factors factors;
1293 if (n1 == 0)
1294 return prime_p (n0);
1296 nm1[1] = n1 - (n0 == 0);
1297 nm1[0] = n0 - 1;
1298 if (nm1[0] == 0)
1300 count_trailing_zeros (k, nm1[1]);
1302 q[0] = nm1[1] >> k;
1303 q[1] = 0;
1304 k += W_TYPE_SIZE;
1306 else
1308 count_trailing_zeros (k, nm1[0]);
1309 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1312 uintmax_t a = 2;
1313 binv (ni, n0);
1314 redcify2 (one[1], one[0], 1, n1, n0);
1315 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1317 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1318 na[0] = n0;
1319 na[1] = n1;
1321 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1322 return false;
1324 if (flag_prove_primality)
1326 /* Factor n-1 for Lucas. */
1327 factor (nm1[1], nm1[0], &factors);
1330 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1331 number composite. */
1332 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1334 bool is_prime;
1335 uintmax_t e[2], y[2];
1337 if (flag_prove_primality)
1339 is_prime = true;
1340 if (factors.plarge[1])
1342 uintmax_t pi;
1343 binv (pi, factors.plarge[0]);
1344 e[0] = pi * nm1[0];
1345 e[1] = 0;
1346 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1347 is_prime = (y[0] != one[0] || y[1] != one[1]);
1349 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1351 /* FIXME: We always have the factor 2. Do we really need to
1352 handle it here? We have done the same powering as part
1353 of millerrabin. */
1354 if (factors.p[i] == 2)
1355 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1356 else
1357 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1358 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1359 is_prime = (y[0] != one[0] || y[1] != one[1]);
1362 else
1364 /* After enough Miller-Rabin runs, be content. */
1365 is_prime = (r == MR_REPS - 1);
1368 if (is_prime)
1369 return true;
1371 a += primes_diff[r]; /* Establish new base. */
1372 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1374 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1375 return false;
1378 error (0, 0, _("Lucas prime test failure. This should not happen"));
1379 abort ();
1382 #if HAVE_GMP
1383 static bool
1384 mp_prime_p (mpz_t n)
1386 bool is_prime;
1387 mpz_t q, a, nm1, tmp;
1388 struct mp_factors factors;
1390 if (mpz_cmp_ui (n, 1) <= 0)
1391 return false;
1393 /* We have already casted out small primes. */
1394 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1395 return true;
1397 mpz_inits (q, a, nm1, tmp, NULL);
1399 /* Precomputation for Miller-Rabin. */
1400 mpz_sub_ui (nm1, n, 1);
1402 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1403 unsigned long int k = mpz_scan1 (nm1, 0);
1404 mpz_tdiv_q_2exp (q, nm1, k);
1406 mpz_set_ui (a, 2);
1408 /* Perform a Miller-Rabin test, finds most composites quickly. */
1409 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1411 is_prime = false;
1412 goto ret2;
1415 if (flag_prove_primality)
1417 /* Factor n-1 for Lucas. */
1418 mpz_set (tmp, nm1);
1419 mp_factor (tmp, &factors);
1422 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1423 number composite. */
1424 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1426 if (flag_prove_primality)
1428 is_prime = true;
1429 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1431 mpz_divexact (tmp, nm1, factors.p[i]);
1432 mpz_powm (tmp, a, tmp, n);
1433 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1436 else
1438 /* After enough Miller-Rabin runs, be content. */
1439 is_prime = (r == MR_REPS - 1);
1442 if (is_prime)
1443 goto ret1;
1445 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1447 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1449 is_prime = false;
1450 goto ret1;
1454 error (0, 0, _("Lucas prime test failure. This should not happen"));
1455 abort ();
1457 ret1:
1458 if (flag_prove_primality)
1459 mp_factor_clear (&factors);
1460 ret2:
1461 mpz_clears (q, a, nm1, tmp, NULL);
1463 return is_prime;
1465 #endif
1467 static void
1468 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1469 struct factors *factors)
1471 uintmax_t x, z, y, P, t, ni, g;
1473 unsigned long int k = 1;
1474 unsigned long int l = 1;
1476 redcify (P, 1, n);
1477 addmod (x, P, P, n); /* i.e., redcify(2) */
1478 y = z = x;
1480 while (n != 1)
1482 assert (a < n);
1484 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1486 for (;;)
1490 x = mulredc (x, x, n, ni);
1491 addmod (x, x, a, n);
1493 submod (t, z, x, n);
1494 P = mulredc (P, t, n, ni);
1496 if (k % 32 == 1)
1498 if (gcd_odd (P, n) != 1)
1499 goto factor_found;
1500 y = x;
1503 while (--k != 0);
1505 z = x;
1506 k = l;
1507 l = 2 * l;
1508 for (unsigned long int i = 0; i < k; i++)
1510 x = mulredc (x, x, n, ni);
1511 addmod (x, x, a, n);
1513 y = x;
1516 factor_found:
1519 y = mulredc (y, y, n, ni);
1520 addmod (y, y, a, n);
1522 submod (t, z, y, n);
1523 g = gcd_odd (t, n);
1525 while (g == 1);
1527 n = n / g;
1529 if (!prime_p (g))
1530 factor_using_pollard_rho (g, a + 1, factors);
1531 else
1532 factor_insert (factors, g);
1534 if (prime_p (n))
1536 factor_insert (factors, n);
1537 break;
1540 x = x % n;
1541 z = z % n;
1542 y = y % n;
1546 static void
1547 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1548 struct factors *factors)
1550 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1552 unsigned long int k = 1;
1553 unsigned long int l = 1;
1555 redcify2 (P1, P0, 1, n1, n0);
1556 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1557 y1 = z1 = x1;
1558 y0 = z0 = x0;
1560 while (n1 != 0 || n0 != 1)
1562 binv (ni, n0);
1564 for (;;)
1568 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1569 x1 = r1m;
1570 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1572 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1573 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1574 P1 = r1m;
1576 if (k % 32 == 1)
1578 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1579 if (g1 != 0 || g0 != 1)
1580 goto factor_found;
1581 y1 = x1; y0 = x0;
1584 while (--k != 0);
1586 z1 = x1; z0 = x0;
1587 k = l;
1588 l = 2 * l;
1589 for (unsigned long int i = 0; i < k; i++)
1591 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1592 x1 = r1m;
1593 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1595 y1 = x1; y0 = x0;
1598 factor_found:
1601 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1602 y1 = r1m;
1603 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1605 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1606 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1608 while (g1 == 0 && g0 == 1);
1610 if (g1 == 0)
1612 /* The found factor is one word. */
1613 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1615 if (!prime_p (g0))
1616 factor_using_pollard_rho (g0, a + 1, factors);
1617 else
1618 factor_insert (factors, g0);
1620 else
1622 /* The found factor is two words. This is highly unlikely, thus hard
1623 to trigger. Please be careful before you change this code! */
1624 uintmax_t ginv;
1626 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1627 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1628 n1 = 0; /* modulo B, ignoring the high divisor word. */
1630 if (!prime2_p (g1, g0))
1631 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1632 else
1633 factor_insert_large (factors, g1, g0);
1636 if (n1 == 0)
1638 if (prime_p (n0))
1640 factor_insert (factors, n0);
1641 break;
1644 factor_using_pollard_rho (n0, a, factors);
1645 return;
1648 if (prime2_p (n1, n0))
1650 factor_insert_large (factors, n1, n0);
1651 break;
1654 x0 = mod2 (&x1, x1, x0, n1, n0);
1655 z0 = mod2 (&z1, z1, z0, n1, n0);
1656 y0 = mod2 (&y1, y1, y0, n1, n0);
1660 #if HAVE_GMP
1661 static void
1662 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1663 struct mp_factors *factors)
1665 mpz_t x, z, y, P;
1666 mpz_t t, t2;
1668 debug ("[pollard-rho (%lu)] ", a);
1670 mpz_inits (t, t2, NULL);
1671 mpz_init_set_si (y, 2);
1672 mpz_init_set_si (x, 2);
1673 mpz_init_set_si (z, 2);
1674 mpz_init_set_ui (P, 1);
1676 unsigned long long int k = 1;
1677 unsigned long long int l = 1;
1679 while (mpz_cmp_ui (n, 1) != 0)
1681 for (;;)
1685 mpz_mul (t, x, x);
1686 mpz_mod (x, t, n);
1687 mpz_add_ui (x, x, a);
1689 mpz_sub (t, z, x);
1690 mpz_mul (t2, P, t);
1691 mpz_mod (P, t2, n);
1693 if (k % 32 == 1)
1695 mpz_gcd (t, P, n);
1696 if (mpz_cmp_ui (t, 1) != 0)
1697 goto factor_found;
1698 mpz_set (y, x);
1701 while (--k != 0);
1703 mpz_set (z, x);
1704 k = l;
1705 l = 2 * l;
1706 for (unsigned long long int i = 0; i < k; i++)
1708 mpz_mul (t, x, x);
1709 mpz_mod (x, t, n);
1710 mpz_add_ui (x, x, a);
1712 mpz_set (y, x);
1715 factor_found:
1718 mpz_mul (t, y, y);
1719 mpz_mod (y, t, n);
1720 mpz_add_ui (y, y, a);
1722 mpz_sub (t, z, y);
1723 mpz_gcd (t, t, n);
1725 while (mpz_cmp_ui (t, 1) == 0);
1727 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1729 if (!mp_prime_p (t))
1731 debug ("[composite factor--restarting pollard-rho] ");
1732 mp_factor_using_pollard_rho (t, a + 1, factors);
1734 else
1736 mp_factor_insert (factors, t);
1739 if (mp_prime_p (n))
1741 mp_factor_insert (factors, n);
1742 break;
1745 mpz_mod (x, x, n);
1746 mpz_mod (z, z, n);
1747 mpz_mod (y, y, n);
1750 mpz_clears (P, t2, t, z, x, y, NULL);
1752 #endif
1754 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1755 algorithm is replaced, consider also returning the remainder. */
1756 static uintmax_t _GL_ATTRIBUTE_CONST
1757 isqrt (uintmax_t n)
1759 uintmax_t x;
1760 unsigned c;
1761 if (n == 0)
1762 return 0;
1764 count_leading_zeros (c, n);
1766 /* Make x > sqrt(n). This will be invariant through the loop. */
1767 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1769 for (;;)
1771 uintmax_t y = (x + n/x) / 2;
1772 if (y >= x)
1773 return x;
1775 x = y;
1779 static uintmax_t _GL_ATTRIBUTE_CONST
1780 isqrt2 (uintmax_t nh, uintmax_t nl)
1782 unsigned int shift;
1783 uintmax_t x;
1785 /* Ensures the remainder fits in an uintmax_t. */
1786 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1788 if (nh == 0)
1789 return isqrt (nl);
1791 count_leading_zeros (shift, nh);
1792 shift &= ~1;
1794 /* Make x > sqrt(n) */
1795 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1796 x <<= (W_TYPE_SIZE - shift) / 2;
1798 /* Do we need more than one iteration? */
1799 for (;;)
1801 uintmax_t r ATTRIBUTE_UNUSED;
1802 uintmax_t q, y;
1803 udiv_qrnnd (q, r, nh, nl, x);
1804 y = (x + q) / 2;
1806 if (y >= x)
1808 uintmax_t hi, lo;
1809 umul_ppmm (hi, lo, x + 1, x + 1);
1810 assert (gt2 (hi, lo, nh, nl));
1812 umul_ppmm (hi, lo, x, x);
1813 assert (ge2 (nh, nl, hi, lo));
1814 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1815 assert (hi == 0);
1817 return x;
1820 x = y;
1824 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1825 #define MAGIC64 ((uint64_t) 0x0202021202030213ULL)
1826 #define MAGIC63 ((uint64_t) 0x0402483012450293ULL)
1827 #define MAGIC65 ((uint64_t) 0x218a019866014613ULL)
1828 #define MAGIC11 0x23b
1830 /* Return the square root if the input is a square, otherwise 0. */
1831 static uintmax_t _GL_ATTRIBUTE_CONST
1832 is_square (uintmax_t x)
1834 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1835 computing the square root. */
1836 if (((MAGIC64 >> (x & 63)) & 1)
1837 && ((MAGIC63 >> (x % 63)) & 1)
1838 /* Both 0 and 64 are squares mod (65) */
1839 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1840 && ((MAGIC11 >> (x % 11) & 1)))
1842 uintmax_t r = isqrt (x);
1843 if (r*r == x)
1844 return r;
1846 return 0;
1849 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1850 static const unsigned short invtab[0x81] =
1852 0x200,
1853 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1854 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1855 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1856 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1857 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1858 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1859 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1860 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1861 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1862 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1863 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1864 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1865 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1866 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1867 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1868 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1871 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1872 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1873 #define div_smallq(q, r, u, d) \
1874 do { \
1875 if ((u) / 0x40 < (d)) \
1877 int _cnt; \
1878 uintmax_t _dinv, _mask, _q, _r; \
1879 count_leading_zeros (_cnt, (d)); \
1880 _r = (u); \
1881 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1883 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1884 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1886 else \
1888 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1889 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1891 _r -= _q*(d); \
1893 _mask = -(uintmax_t) (_r >= (d)); \
1894 (r) = _r - (_mask & (d)); \
1895 (q) = _q - _mask; \
1896 assert ( (q) * (d) + (r) == u); \
1898 else \
1900 uintmax_t _q = (u) / (d); \
1901 (r) = (u) - _q * (d); \
1902 (q) = _q; \
1904 } while (0)
1906 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1907 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1908 with 3025 = 55^2.
1910 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1911 representing G_0 = (-55, 2*4652, 8653).
1913 In the notation of the paper:
1915 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1919 t_0 = floor([q_0 + R_0] / S0) = 1
1920 R_1 = t_0 * S_0 - R_0 = 4001
1921 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1924 /* Multipliers, in order of efficiency:
1925 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1926 0.7317 3*5*7 = 105 = 1
1927 0.7820 3*5*11 = 165 = 1
1928 0.7872 3*5 = 15 = 3
1929 0.8101 3*7*11 = 231 = 3
1930 0.8155 3*7 = 21 = 1
1931 0.8284 5*7*11 = 385 = 1
1932 0.8339 5*7 = 35 = 3
1933 0.8716 3*11 = 33 = 1
1934 0.8774 3 = 3 = 3
1935 0.8913 5*11 = 55 = 3
1936 0.8972 5 = 5 = 1
1937 0.9233 7*11 = 77 = 1
1938 0.9295 7 = 7 = 3
1939 0.9934 11 = 11 = 3
1941 #define QUEUE_SIZE 50
1943 #if STAT_SQUFOF
1944 # define Q_FREQ_SIZE 50
1945 /* Element 0 keeps the total */
1946 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1947 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1948 #endif
1950 /* Return true on success. Expected to fail only for numbers
1951 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1952 static bool
1953 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1955 /* Uses algorithm and notation from
1957 SQUARE FORM FACTORIZATION
1958 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1960 http://homes.cerias.purdue.edu/~ssw/squfof.pdf
1963 static const unsigned int multipliers_1[] =
1964 { /* = 1 (mod 4) */
1965 105, 165, 21, 385, 33, 5, 77, 1, 0
1967 static const unsigned int multipliers_3[] =
1968 { /* = 3 (mod 4) */
1969 1155, 15, 231, 35, 3, 55, 7, 11, 0
1972 const unsigned int *m;
1974 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1976 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1977 return false;
1979 uintmax_t sqrt_n = isqrt2 (n1, n0);
1981 if (n0 == sqrt_n * sqrt_n)
1983 uintmax_t p1, p0;
1985 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1986 assert (p0 == n0);
1988 if (n1 == p1)
1990 if (prime_p (sqrt_n))
1991 factor_insert_multiplicity (factors, sqrt_n, 2);
1992 else
1994 struct factors f;
1996 f.nfactors = 0;
1997 if (!factor_using_squfof (0, sqrt_n, &f))
1999 /* Try pollard rho instead */
2000 factor_using_pollard_rho (sqrt_n, 1, &f);
2002 /* Duplicate the new factors */
2003 for (unsigned int i = 0; i < f.nfactors; i++)
2004 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
2006 return true;
2010 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2011 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2012 *m; m++)
2014 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2015 unsigned int i;
2016 unsigned int mu = *m;
2017 unsigned int qpos = 0;
2019 assert (mu * n0 % 4 == 3);
2021 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2022 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2023 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2024 mu^3 < n.
2026 However, this seems insufficient: With n = 37243139 and mu =
2027 105, we get a trivial factor, from the square 38809 = 197^2,
2028 without any corresponding Q earlier in the iteration.
2030 Requiring 64 mu^3 < n seems sufficient. */
2031 if (n1 == 0)
2033 if ((uintmax_t) mu*mu*mu >= n0 / 64)
2034 continue;
2036 else
2038 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2039 continue;
2041 umul_ppmm (Dh, Dl, n0, mu);
2042 Dh += n1 * mu;
2044 assert (Dl % 4 != 1);
2045 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2047 S = isqrt2 (Dh, Dl);
2049 Q1 = 1;
2050 P = S;
2052 /* Square root remainder fits in one word, so ignore high part. */
2053 Q = Dl - P*P;
2054 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2055 L = isqrt (2*S);
2056 B = 2*L;
2057 L1 = mu * 2 * L;
2059 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2060 4 D. */
2062 for (i = 0; i <= B; i++)
2064 uintmax_t q, P1, t, rem;
2066 div_smallq (q, rem, S+P, Q);
2067 P1 = S - rem; /* P1 = q*Q - P */
2069 #if STAT_SQUFOF
2070 assert (q > 0);
2071 q_freq[0]++;
2072 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2073 #endif
2075 if (Q <= L1)
2077 uintmax_t g = Q;
2079 if ( (Q & 1) == 0)
2080 g /= 2;
2082 g /= gcd_odd (g, mu);
2084 if (g <= L)
2086 if (qpos >= QUEUE_SIZE)
2087 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2088 queue[qpos].Q = g;
2089 queue[qpos].P = P % g;
2090 qpos++;
2094 /* I think the difference can be either sign, but mod
2095 2^W_TYPE_SIZE arithmetic should be fine. */
2096 t = Q1 + q * (P - P1);
2097 Q1 = Q;
2098 Q = t;
2099 P = P1;
2101 if ( (i & 1) == 0)
2103 uintmax_t r = is_square (Q);
2104 if (r)
2106 for (unsigned int j = 0; j < qpos; j++)
2108 if (queue[j].Q == r)
2110 if (r == 1)
2111 /* Traversed entire cycle. */
2112 goto next_multiplier;
2114 /* Need the absolute value for divisibility test. */
2115 if (P >= queue[j].P)
2116 t = P - queue[j].P;
2117 else
2118 t = queue[j].P - P;
2119 if (t % r == 0)
2121 /* Delete entries up to and including entry
2122 j, which matched. */
2123 memmove (queue, queue + j + 1,
2124 (qpos - j - 1) * sizeof (queue[0]));
2125 qpos -= (j + 1);
2127 goto next_i;
2131 /* We have found a square form, which should give a
2132 factor. */
2133 Q1 = r;
2134 assert (S >= P); /* What signs are possible? */
2135 P += r * ((S - P) / r);
2137 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2138 for the case D = 2N. */
2139 /* Compute Q = (D - P*P) / Q1, but we need double
2140 precision. */
2141 uintmax_t hi, lo;
2142 umul_ppmm (hi, lo, P, P);
2143 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2144 udiv_qrnnd (Q, rem, hi, lo, Q1);
2145 assert (rem == 0);
2147 for (;;)
2149 /* Note: There appears to by a typo in the paper,
2150 Step 4a in the algorithm description says q <--
2151 floor([S+P]/\hat Q), but looking at the equations
2152 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2153 (In this code, \hat Q is Q1). */
2154 div_smallq (q, rem, S+P, Q);
2155 P1 = S - rem; /* P1 = q*Q - P */
2157 #if STAT_SQUFOF
2158 q_freq[0]++;
2159 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2160 #endif
2161 if (P == P1)
2162 break;
2163 t = Q1 + q * (P - P1);
2164 Q1 = Q;
2165 Q = t;
2166 P = P1;
2169 if ( (Q & 1) == 0)
2170 Q /= 2;
2171 Q /= gcd_odd (Q, mu);
2173 assert (Q > 1 && (n1 || Q < n0));
2175 if (prime_p (Q))
2176 factor_insert (factors, Q);
2177 else if (!factor_using_squfof (0, Q, factors))
2178 factor_using_pollard_rho (Q, 2, factors);
2180 divexact_21 (n1, n0, n1, n0, Q);
2182 if (prime2_p (n1, n0))
2183 factor_insert_large (factors, n1, n0);
2184 else
2186 if (!factor_using_squfof (n1, n0, factors))
2188 if (n1 == 0)
2189 factor_using_pollard_rho (n0, 1, factors);
2190 else
2191 factor_using_pollard_rho2 (n1, n0, 1, factors);
2195 return true;
2198 next_i:;
2200 next_multiplier:;
2202 return false;
2205 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2206 results in FACTORS. Use the algorithm selected by the global ALG. */
2207 static void
2208 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2210 factors->nfactors = 0;
2211 factors->plarge[1] = 0;
2213 if (t1 == 0 && t0 < 2)
2214 return;
2216 t0 = factor_using_division (&t1, t1, t0, factors);
2218 if (t1 == 0 && t0 < 2)
2219 return;
2221 if (prime2_p (t1, t0))
2222 factor_insert_large (factors, t1, t0);
2223 else
2225 if (alg == ALG_SQUFOF)
2226 if (factor_using_squfof (t1, t0, factors))
2227 return;
2229 if (t1 == 0)
2230 factor_using_pollard_rho (t0, 1, factors);
2231 else
2232 factor_using_pollard_rho2 (t1, t0, 1, factors);
2236 #if HAVE_GMP
2237 /* Use Pollard-rho to compute the prime factors of
2238 arbitrary-precision T, and put the results in FACTORS. */
2239 static void
2240 mp_factor (mpz_t t, struct mp_factors *factors)
2242 mp_factor_init (factors);
2244 if (mpz_sgn (t) != 0)
2246 mp_factor_using_division (t, factors);
2248 if (mpz_cmp_ui (t, 1) != 0)
2250 debug ("[is number prime?] ");
2251 if (mp_prime_p (t))
2252 mp_factor_insert (factors, t);
2253 else
2254 mp_factor_using_pollard_rho (t, 1, factors);
2258 #endif
2260 static strtol_error
2261 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2263 unsigned int lo_carry;
2264 uintmax_t hi = 0, lo = 0;
2266 strtol_error err = LONGINT_INVALID;
2268 /* Skip initial spaces and '+'. */
2269 for (;;)
2271 char c = *s;
2272 if (c == ' ')
2273 s++;
2274 else if (c == '+')
2276 s++;
2277 break;
2279 else
2280 break;
2283 /* Initial scan for invalid digits. */
2284 const char *p = s;
2285 for (;;)
2287 unsigned int c = *p++;
2288 if (c == 0)
2289 break;
2291 if (UNLIKELY (!ISDIGIT (c)))
2293 err = LONGINT_INVALID;
2294 break;
2297 err = LONGINT_OK; /* we've seen at least one valid digit */
2300 for (;err == LONGINT_OK;)
2302 unsigned int c = *s++;
2303 if (c == 0)
2304 break;
2306 c -= '0';
2308 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2310 err = LONGINT_OVERFLOW;
2311 break;
2313 hi = 10 * hi;
2315 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2316 lo_carry += 10 * lo < 2 * lo;
2318 lo = 10 * lo;
2319 lo += c;
2321 lo_carry += lo < c;
2322 hi += lo_carry;
2323 if (UNLIKELY (hi < lo_carry))
2325 err = LONGINT_OVERFLOW;
2326 break;
2330 *hip = hi;
2331 *lop = lo;
2333 return err;
2336 static void
2337 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2339 uintmax_t q, r;
2341 if (t1 == 0)
2342 printf ("%"PRIuMAX, t0);
2343 else
2345 /* Use very plain code here since it seems hard to write fast code
2346 without assuming a specific word size. */
2347 q = t1 / 1000000000;
2348 r = t1 % 1000000000;
2349 udiv_qrnnd (t0, r, r, t0, 1000000000);
2350 print_uintmaxes (q, t0);
2351 printf ("%09u", (int) r);
2355 /* Single-precision factoring */
2356 static void
2357 print_factors_single (uintmax_t t1, uintmax_t t0)
2359 struct factors factors;
2361 print_uintmaxes (t1, t0);
2362 putchar (':');
2364 factor (t1, t0, &factors);
2366 for (unsigned int j = 0; j < factors.nfactors; j++)
2367 for (unsigned int k = 0; k < factors.e[j]; k++)
2369 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2370 putchar (' ');
2371 fputs (umaxtostr (factors.p[j], buf), stdout);
2374 if (factors.plarge[1])
2376 putchar (' ');
2377 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2379 putchar ('\n');
2382 /* Emit the factors of the indicated number. If we have the option of using
2383 either algorithm, we select on the basis of the length of the number.
2384 For longer numbers, we prefer the MP algorithm even if the native algorithm
2385 has enough digits, because the algorithm is better. The turnover point
2386 depends on the value. */
2387 static bool
2388 print_factors (const char *input)
2390 uintmax_t t1, t0;
2392 /* Try converting the number to one or two words. If it fails, use GMP or
2393 print an error message. The 2nd condition checks that the most
2394 significant bit of the two-word number is clear, in a typesize neutral
2395 way. */
2396 strtol_error err = strto2uintmax (&t1, &t0, input);
2398 switch (err)
2400 case LONGINT_OK:
2401 if (((t1 << 1) >> 1) == t1)
2403 debug ("[%s]", _("using single-precision arithmetic"));
2404 print_factors_single (t1, t0);
2405 return true;
2407 break;
2409 case LONGINT_OVERFLOW:
2410 /* Try GMP. */
2411 break;
2413 default:
2414 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2415 return false;
2418 #if HAVE_GMP
2419 debug ("[%s]", _("using arbitrary-precision arithmetic"));
2420 mpz_t t;
2421 struct mp_factors factors;
2423 mpz_init_set_str (t, input, 10);
2425 gmp_printf ("%Zd:", t);
2426 mp_factor (t, &factors);
2428 for (unsigned int j = 0; j < factors.nfactors; j++)
2429 for (unsigned int k = 0; k < factors.e[j]; k++)
2430 gmp_printf (" %Zd", factors.p[j]);
2432 mp_factor_clear (&factors);
2433 mpz_clear (t);
2434 putchar ('\n');
2435 return true;
2436 #else
2437 error (0, 0, _("%s is too large"), quote (input));
2438 return false;
2439 #endif
2442 void
2443 usage (int status)
2445 if (status != EXIT_SUCCESS)
2446 emit_try_help ();
2447 else
2449 printf (_("\
2450 Usage: %s [NUMBER]...\n\
2451 or: %s OPTION\n\
2453 program_name, program_name);
2454 fputs (_("\
2455 Print the prime factors of each specified integer NUMBER. If none\n\
2456 are specified on the command line, read them from standard input.\n\
2458 "), stdout);
2459 fputs (HELP_OPTION_DESCRIPTION, stdout);
2460 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2461 emit_ancillary_info ();
2463 exit (status);
2466 static bool
2467 do_stdin (void)
2469 bool ok = true;
2470 token_buffer tokenbuffer;
2472 init_tokenbuffer (&tokenbuffer);
2474 while (true)
2476 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2477 &tokenbuffer);
2478 if (token_length == (size_t) -1)
2479 break;
2480 ok &= print_factors (tokenbuffer.buffer);
2482 free (tokenbuffer.buffer);
2484 return ok;
2488 main (int argc, char **argv)
2490 initialize_main (&argc, &argv);
2491 set_program_name (argv[0]);
2492 setlocale (LC_ALL, "");
2493 bindtextdomain (PACKAGE, LOCALEDIR);
2494 textdomain (PACKAGE);
2496 atexit (close_stdout);
2498 alg = ALG_POLLARD_RHO; /* Default to Pollard rho */
2500 int c;
2501 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2503 switch (c)
2505 case VERBOSE_OPTION:
2506 verbose = 1;
2507 break;
2509 case 's':
2510 alg = ALG_SQUFOF;
2511 break;
2513 case 'w':
2514 flag_prove_primality = false;
2515 break;
2517 case_GETOPT_HELP_CHAR;
2519 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2521 default:
2522 usage (EXIT_FAILURE);
2526 #if STAT_SQUFOF
2527 if (alg == ALG_SQUFOF)
2528 memset (q_freq, 0, sizeof (q_freq));
2529 #endif
2531 bool ok;
2532 if (argc <= optind)
2533 ok = do_stdin ();
2534 else
2536 ok = true;
2537 for (int i = optind; i < argc; i++)
2538 if (! print_factors (argv[i]))
2539 ok = false;
2542 #if STAT_SQUFOF
2543 if (alg == ALG_SQUFOF && q_freq[0] > 0)
2545 double acc_f;
2546 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2547 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2549 double f = (double) q_freq[i] / q_freq[0];
2550 acc_f += f;
2551 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2552 100.0 * f, 100.0 * acc_f);
2555 #endif
2557 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);