build: avoid compile warnings in factor.c on some systems
[coreutils.git] / src / factor.c
blob539a686e2e11d63e5bc75ef13d615f797b86f779
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synched 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or the SQUFOF algorithm, checking status of found factors
54 again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 #include <config.h>
88 #include <getopt.h>
89 #include <stdio.h>
90 #if HAVE_GMP
91 # include <gmp.h>
92 # if !HAVE_DECL_MPZ_INITS
93 # include <stdarg.h>
94 # endif
95 #endif
97 #include <assert.h>
99 #include "system.h"
100 #include "error.h"
101 #include "quote.h"
102 #include "readtokens.h"
103 #include "xstrtol.h"
105 /* The official name of this program (e.g., no 'g' prefix). */
106 #define PROGRAM_NAME "factor"
108 #define AUTHORS \
109 proper_name ("Paul Rubin"), \
110 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
111 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
113 /* Token delimiters when reading from a file. */
114 #define DELIM "\n\t "
116 #ifndef STAT_SQUFOF
117 # define STAT_SQUFOF 0
118 #endif
120 #ifndef USE_LONGLONG_H
121 # define USE_LONGLONG_H 1
122 #endif
124 #if USE_LONGLONG_H
126 /* Make definitions for longlong.h to make it do what it can do for us */
127 # define W_TYPE_SIZE 64 /* bitcount for uintmax_t */
128 # define UWtype uintmax_t
129 # define UHWtype unsigned long int
130 # undef UDWtype
131 # if HAVE_ATTRIBUTE_MODE
132 typedef unsigned int UQItype __attribute__ ((mode (QI)));
133 typedef int SItype __attribute__ ((mode (SI)));
134 typedef unsigned int USItype __attribute__ ((mode (SI)));
135 typedef int DItype __attribute__ ((mode (DI)));
136 typedef unsigned int UDItype __attribute__ ((mode (DI)));
137 # else
138 typedef unsigned char UQItype;
139 typedef long SItype;
140 typedef unsigned long int USItype;
141 # if HAVE_LONG_LONG
142 typedef long long int DItype;
143 typedef unsigned long long int UDItype;
144 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
145 typedef long int DItype;
146 typedef unsigned long int UDItype;
147 # endif
148 # endif
149 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
150 # define ASSERT(x) /* FIXME make longlong.h really standalone */
151 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
152 # ifndef __GMP_GNUC_PREREQ
153 # define __GMP_GNUC_PREREQ(a,b) 1
154 # endif
155 # if _ARCH_PPC
156 # define HAVE_HOST_CPU_FAMILY_powerpc 1
157 # endif
158 # include "longlong.h"
159 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
160 const unsigned char factor_clz_tab[129] =
162 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
163 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
164 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
165 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
168 # endif
170 #else /* not USE_LONGLONG_H */
172 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
173 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
174 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
175 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
177 #endif
179 #if !defined __clz_tab && !defined UHWtype
180 /* Without this seemingly useless conditional, gcc -Wunused-macros
181 warns that each of the two tested macros is unused on Fedora 18.
182 FIXME: this is just an ugly band-aid. Fix it properly. */
183 #endif
185 enum alg_type { ALG_POLLARD_RHO = 1, ALG_SQUFOF = 2 };
187 static enum alg_type alg;
189 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
190 #define MAX_NFACTS 26
192 enum
194 VERBOSE_OPTION = CHAR_MAX + 1
197 static struct option const long_options[] =
199 {"verbose", no_argument, NULL, VERBOSE_OPTION},
200 {GETOPT_HELP_OPTION_DECL},
201 {GETOPT_VERSION_OPTION_DECL},
202 {NULL, 0, NULL, 0}
205 struct factors
207 uintmax_t plarge[2]; /* Can have a single large factor */
208 uintmax_t p[MAX_NFACTS];
209 unsigned char e[MAX_NFACTS];
210 unsigned char nfactors;
213 #if HAVE_GMP
214 struct mp_factors
216 mpz_t *p;
217 unsigned long int *e;
218 unsigned long int nfactors;
220 #endif
222 static void factor (uintmax_t, uintmax_t, struct factors *);
224 #ifndef umul_ppmm
225 # define umul_ppmm(w1, w0, u, v) \
226 do { \
227 uintmax_t __x0, __x1, __x2, __x3; \
228 unsigned long int __ul, __vl, __uh, __vh; \
229 uintmax_t __u = (u), __v = (v); \
231 __ul = __ll_lowpart (__u); \
232 __uh = __ll_highpart (__u); \
233 __vl = __ll_lowpart (__v); \
234 __vh = __ll_highpart (__v); \
236 __x0 = (uintmax_t) __ul * __vl; \
237 __x1 = (uintmax_t) __ul * __vh; \
238 __x2 = (uintmax_t) __uh * __vl; \
239 __x3 = (uintmax_t) __uh * __vh; \
241 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
242 __x1 += __x2; /* but this indeed can */ \
243 if (__x1 < __x2) /* did we get it? */ \
244 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
246 (w1) = __x3 + __ll_highpart (__x1); \
247 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
248 } while (0)
249 #endif
251 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
252 /* Define our own, not needing normalization. This function is
253 currently not performance critical, so keep it simple. Similar to
254 the mod macro below. */
255 # undef udiv_qrnnd
256 # define udiv_qrnnd(q, r, n1, n0, d) \
257 do { \
258 uintmax_t __d1, __d0, __q, __r1, __r0; \
260 assert ((n1) < (d)); \
261 __d1 = (d); __d0 = 0; \
262 __r1 = (n1); __r0 = (n0); \
263 __q = 0; \
264 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
266 rsh2 (__d1, __d0, __d1, __d0, 1); \
267 __q <<= 1; \
268 if (ge2 (__r1, __r0, __d1, __d0)) \
270 __q++; \
271 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
274 (r) = __r0; \
275 (q) = __q; \
276 } while (0)
277 #endif
279 #if !defined add_ssaaaa
280 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
281 do { \
282 uintmax_t _add_x; \
283 _add_x = (al) + (bl); \
284 (sh) = (ah) + (bh) + (_add_x < (al)); \
285 (sl) = _add_x; \
286 } while (0)
287 #endif
289 #define rsh2(rh, rl, ah, al, cnt) \
290 do { \
291 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
292 (rh) = (ah) >> (cnt); \
293 } while (0)
295 #define lsh2(rh, rl, ah, al, cnt) \
296 do { \
297 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
298 (rl) = (al) << (cnt); \
299 } while (0)
301 #define ge2(ah, al, bh, bl) \
302 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
304 #define gt2(ah, al, bh, bl) \
305 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
307 #ifndef sub_ddmmss
308 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
309 do { \
310 uintmax_t _cy; \
311 _cy = (al) < (bl); \
312 (rl) = (al) - (bl); \
313 (rh) = (ah) - (bh) - _cy; \
314 } while (0)
315 #endif
317 #ifndef count_leading_zeros
318 # define count_leading_zeros(count, x) do { \
319 uintmax_t __clz_x = (x); \
320 unsigned int __clz_c; \
321 for (__clz_c = 0; \
322 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
323 __clz_c += 8) \
324 __clz_x <<= 8; \
325 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
326 __clz_x <<= 1; \
327 (count) = __clz_c; \
328 } while (0)
329 #endif
331 #ifndef count_trailing_zeros
332 # define count_trailing_zeros(count, x) do { \
333 uintmax_t __ctz_x = (x); \
334 unsigned int __ctz_c = 0; \
335 while ((__ctz_x & 1) == 0) \
337 __ctz_x >>= 1; \
338 __ctz_c++; \
340 (count) = __ctz_c; \
341 } while (0)
342 #endif
344 /* Requires that a < n and b <= n */
345 #define submod(r,a,b,n) \
346 do { \
347 uintmax_t _t = - (uintmax_t) (a < b); \
348 (r) = ((n) & _t) + (a) - (b); \
349 } while (0)
351 #define addmod(r,a,b,n) \
352 submod ((r), (a), ((n) - (b)), (n))
354 /* Modular two-word addition and subtraction. For performance reasons, the
355 most significant bit of n1 must be clear. The destination variables must be
356 distinct from the mod operand. */
357 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
358 do { \
359 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
360 if (ge2 ((r1), (r0), (n1), (n0))) \
361 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
362 } while (0)
363 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
364 do { \
365 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
366 if ((intmax_t) (r1) < 0) \
367 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
368 } while (0)
370 #define HIGHBIT_TO_MASK(x) \
371 (((intmax_t)-1 >> 1) < 0 \
372 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
373 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
374 ? UINTMAX_MAX : (uintmax_t) 0))
376 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
377 Requires that d1 != 0. */
378 static uintmax_t
379 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
381 int cntd, cnta;
383 assert (d1 != 0);
385 if (a1 == 0)
387 *r1 = 0;
388 return a0;
391 count_leading_zeros (cntd, d1);
392 count_leading_zeros (cnta, a1);
393 int cnt = cntd - cnta;
394 lsh2 (d1, d0, d1, d0, cnt);
395 for (int i = 0; i < cnt; i++)
397 if (ge2 (a1, a0, d1, d0))
398 sub_ddmmss (a1, a0, a1, a0, d1, d0);
399 rsh2 (d1, d0, d1, d0, 1);
402 *r1 = a1;
403 return a0;
406 static uintmax_t _GL_ATTRIBUTE_CONST
407 gcd_odd (uintmax_t a, uintmax_t b)
409 if ( (b & 1) == 0)
411 uintmax_t t = b;
412 b = a;
413 a = t;
415 if (a == 0)
416 return b;
418 /* Take out least significant one bit, to make room for sign */
419 b >>= 1;
421 for (;;)
423 uintmax_t t;
424 uintmax_t bgta;
426 while ((a & 1) == 0)
427 a >>= 1;
428 a >>= 1;
430 t = a - b;
431 if (t == 0)
432 return (a << 1) + 1;
434 bgta = HIGHBIT_TO_MASK (t);
436 /* b <-- min (a, b) */
437 b += (bgta & t);
439 /* a <-- |a - b| */
440 a = (t ^ bgta) - bgta;
444 static uintmax_t
445 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
447 while ((a0 & 1) == 0)
448 rsh2 (a1, a0, a1, a0, 1);
449 while ((b0 & 1) == 0)
450 rsh2 (b1, b0, b1, b0, 1);
452 for (;;)
454 if ((b1 | a1) == 0)
456 *r1 = 0;
457 return gcd_odd (b0, a0);
460 if (gt2 (a1, a0, b1, b0))
462 sub_ddmmss (a1, a0, a1, a0, b1, b0);
464 rsh2 (a1, a0, a1, a0, 1);
465 while ((a0 & 1) == 0);
467 else if (gt2 (b1, b0, a1, a0))
469 sub_ddmmss (b1, b0, b1, b0, a1, a0);
471 rsh2 (b1, b0, b1, b0, 1);
472 while ((b0 & 1) == 0);
474 else
475 break;
478 *r1 = a1;
479 return a0;
482 static void
483 factor_insert_multiplicity (struct factors *factors,
484 uintmax_t prime, unsigned int m)
486 unsigned int nfactors = factors->nfactors;
487 uintmax_t *p = factors->p;
488 unsigned char *e = factors->e;
490 /* Locate position for insert new or increment e. */
491 int i;
492 for (i = nfactors - 1; i >= 0; i--)
494 if (p[i] <= prime)
495 break;
498 if (i < 0 || p[i] != prime)
500 for (int j = nfactors - 1; j > i; j--)
502 p[j + 1] = p[j];
503 e[j + 1] = e[j];
505 p[i + 1] = prime;
506 e[i + 1] = m;
507 factors->nfactors = nfactors + 1;
509 else
511 e[i] += m;
515 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
517 static void
518 factor_insert_large (struct factors *factors,
519 uintmax_t p1, uintmax_t p0)
521 if (p1 > 0)
523 assert (factors->plarge[1] == 0);
524 factors->plarge[0] = p0;
525 factors->plarge[1] = p1;
527 else
528 factor_insert (factors, p0);
531 #if HAVE_GMP
533 # if !HAVE_DECL_MPZ_INITS
535 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
536 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
538 static void
539 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
541 va_list ap;
543 va_start (ap, mpz_single_init);
545 mpz_t *mpz;
546 while ((mpz = va_arg (ap, mpz_t *)))
547 mpz_single_init (*mpz);
549 va_end (ap);
551 # endif
553 static void mp_factor (mpz_t, struct mp_factors *);
555 static void
556 mp_factor_init (struct mp_factors *factors)
558 factors->p = xmalloc (1);
559 factors->e = xmalloc (1);
560 factors->nfactors = 0;
563 static void
564 mp_factor_clear (struct mp_factors *factors)
566 for (unsigned int i = 0; i < factors->nfactors; i++)
567 mpz_clear (factors->p[i]);
569 free (factors->p);
570 free (factors->e);
573 static void
574 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
576 unsigned long int nfactors = factors->nfactors;
577 mpz_t *p = factors->p;
578 unsigned long int *e = factors->e;
579 long i;
581 /* Locate position for insert new or increment e. */
582 for (i = nfactors - 1; i >= 0; i--)
584 if (mpz_cmp (p[i], prime) <= 0)
585 break;
588 if (i < 0 || mpz_cmp (p[i], prime) != 0)
590 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
591 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
593 mpz_init (p[nfactors]);
594 for (long j = nfactors - 1; j > i; j--)
596 mpz_set (p[j + 1], p[j]);
597 e[j + 1] = e[j];
599 mpz_set (p[i + 1], prime);
600 e[i + 1] = 1;
602 factors->p = p;
603 factors->e = e;
604 factors->nfactors = nfactors + 1;
606 else
608 e[i] += 1;
612 static void
613 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
615 mpz_t pz;
617 mpz_init_set_ui (pz, prime);
618 mp_factor_insert (factors, pz);
619 mpz_clear (pz);
621 #endif /* HAVE_GMP */
624 #define P(a,b,c,d) a,
625 static const unsigned char primes_diff[] = {
626 #include "primes.h"
627 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
629 #undef P
631 #define PRIMES_PTAB_ENTRIES \
632 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
634 #define P(a,b,c,d) b,
635 static const unsigned char primes_diff8[] = {
636 #include "primes.h"
637 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
639 #undef P
641 struct primes_dtab
643 uintmax_t binv, lim;
646 #define P(a,b,c,d) {c,d},
647 static const struct primes_dtab primes_dtab[] = {
648 #include "primes.h"
649 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
651 #undef P
653 /* This flag is honored only in the GMP code. */
654 static int verbose = 0;
656 /* Prove primality or run probabilistic tests. */
657 static bool flag_prove_primality = true;
659 /* Number of Miller-Rabin tests to run when not proving primality. */
660 #define MR_REPS 25
662 #ifdef __GNUC__
663 # define LIKELY(cond) __builtin_expect ((cond), 1)
664 # define UNLIKELY(cond) __builtin_expect ((cond), 0)
665 #else
666 # define LIKELY(cond) (cond)
667 # define UNLIKELY(cond) (cond)
668 #endif
670 static void
671 debug (char const *fmt, ...)
673 if (verbose)
675 va_list ap;
676 va_start (ap, fmt);
677 vfprintf (stderr, fmt, ap);
678 va_end (ap);
682 static void
683 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
684 unsigned int off)
686 for (unsigned int j = 0; j < off; j++)
687 p += primes_diff[i + j];
688 factor_insert (factors, p);
691 /* Trial division with odd primes uses the following trick.
693 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
694 consider the case t < B (this is the second loop below).
696 From our tables we get
698 binv = p^{-1} (mod B)
699 lim = floor ( (B-1) / p ).
701 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
702 (and all quotients in this range occur for some t).
704 Then t = q * p is true also (mod B), and p is invertible we get
706 q = t * binv (mod B).
708 Next, assume that t is *not* divisible by p. Since multiplication
709 by binv (mod B) is a one-to-one mapping,
711 t * binv (mod B) > lim,
713 because all the smaller values are already taken.
715 This can be summed up by saying that the function
717 q(t) = binv * t (mod B)
719 is a permutation of the range 0 <= t < B, with the curious property
720 that it maps the multiples of p onto the range 0 <= q <= lim, in
721 order, and the non-multiples of p onto the range lim < q < B.
724 static uintmax_t
725 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
726 struct factors *factors)
728 if (t0 % 2 == 0)
730 unsigned int cnt;
732 if (t0 == 0)
734 count_trailing_zeros (cnt, t1);
735 t0 = t1 >> cnt;
736 t1 = 0;
737 cnt += W_TYPE_SIZE;
739 else
741 count_trailing_zeros (cnt, t0);
742 rsh2 (t1, t0, t1, t0, cnt);
745 factor_insert_multiplicity (factors, 2, cnt);
748 uintmax_t p = 3;
749 unsigned int i;
750 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
752 for (;;)
754 uintmax_t q1, q0, hi, lo;
756 q0 = t0 * primes_dtab[i].binv;
757 umul_ppmm (hi, lo, q0, p);
758 if (hi > t1)
759 break;
760 hi = t1 - hi;
761 q1 = hi * primes_dtab[i].binv;
762 if (LIKELY (q1 > primes_dtab[i].lim))
763 break;
764 t1 = q1; t0 = q0;
765 factor_insert (factors, p);
767 p += primes_diff[i + 1];
769 if (t1p)
770 *t1p = t1;
772 #define DIVBLOCK(I) \
773 do { \
774 for (;;) \
776 q = t0 * pd[I].binv; \
777 if (LIKELY (q > pd[I].lim)) \
778 break; \
779 t0 = q; \
780 factor_insert_refind (factors, p, i + 1, I); \
782 } while (0)
784 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
786 uintmax_t q;
787 const struct primes_dtab *pd = &primes_dtab[i];
788 DIVBLOCK (0);
789 DIVBLOCK (1);
790 DIVBLOCK (2);
791 DIVBLOCK (3);
792 DIVBLOCK (4);
793 DIVBLOCK (5);
794 DIVBLOCK (6);
795 DIVBLOCK (7);
797 p += primes_diff8[i];
798 if (p * p > t0)
799 break;
802 return t0;
805 #if HAVE_GMP
806 static void
807 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
809 mpz_t q;
810 unsigned long int p;
812 debug ("[trial division] ");
814 mpz_init (q);
816 p = mpz_scan1 (t, 0);
817 mpz_div_2exp (t, t, p);
818 while (p)
820 mp_factor_insert_ui (factors, 2);
821 --p;
824 p = 3;
825 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
827 if (! mpz_divisible_ui_p (t, p))
829 p += primes_diff[i++];
830 if (mpz_cmp_ui (t, p * p) < 0)
831 break;
833 else
835 mpz_tdiv_q_ui (t, t, p);
836 mp_factor_insert_ui (factors, p);
840 mpz_clear (q);
842 #endif
844 /* Entry i contains (2i+1)^(-1) mod 2^8. */
845 static const unsigned char binvert_table[128] =
847 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
848 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
849 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
850 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
851 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
852 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
853 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
854 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
855 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
856 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
857 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
858 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
859 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
860 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
861 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
862 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
865 /* Compute n^(-1) mod B, using a Newton iteration. */
866 #define binv(inv,n) \
867 do { \
868 uintmax_t __n = (n); \
869 uintmax_t __inv; \
871 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
872 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
873 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
874 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
876 if (W_TYPE_SIZE > 64) \
878 int __invbits = 64; \
879 do { \
880 __inv = 2 * __inv - __inv * __inv * __n; \
881 __invbits *= 2; \
882 } while (__invbits < W_TYPE_SIZE); \
885 (inv) = __inv; \
886 } while (0)
888 /* q = u / d, assuming d|u. */
889 #define divexact_21(q1, q0, u1, u0, d) \
890 do { \
891 uintmax_t _di, _q0; \
892 binv (_di, (d)); \
893 _q0 = (u0) * _di; \
894 if ((u1) >= (d)) \
896 uintmax_t _p1, _p0; \
897 umul_ppmm (_p1, _p0, _q0, d); \
898 (q1) = ((u1) - _p1) * _di; \
899 (q0) = _q0; \
901 else \
903 (q0) = _q0; \
904 (q1) = 0; \
906 } while (0)
908 /* x B (mod n). */
909 #define redcify(r_prim, r, n) \
910 do { \
911 uintmax_t _redcify_q ATTRIBUTE_UNUSED; \
912 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
913 } while (0)
915 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
916 #define redcify2(r1, r0, x, n1, n0) \
917 do { \
918 uintmax_t _r1, _r0, _i; \
919 if ((x) < (n1)) \
921 _r1 = (x); _r0 = 0; \
922 _i = W_TYPE_SIZE; \
924 else \
926 _r1 = 0; _r0 = (x); \
927 _i = 2*W_TYPE_SIZE; \
929 while (_i-- > 0) \
931 lsh2 (_r1, _r0, _r1, _r0, 1); \
932 if (ge2 (_r1, _r0, (n1), (n0))) \
933 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
935 (r1) = _r1; \
936 (r0) = _r0; \
937 } while (0)
939 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
940 Both a and b must be in redc form, the result will be in redc form too. */
941 static inline uintmax_t
942 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
944 uintmax_t rh, rl, q, th, tl, xh;
946 umul_ppmm (rh, rl, a, b);
947 q = rl * mi;
948 umul_ppmm (th, tl, q, m);
949 xh = rh - th;
950 if (rh < th)
951 xh += m;
953 return xh;
956 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
957 Both a and b must be in redc form, the result will be in redc form too.
958 For performance reasons, the most significant bit of m must be clear. */
959 static uintmax_t
960 mulredc2 (uintmax_t *r1p,
961 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
962 uintmax_t m1, uintmax_t m0, uintmax_t mi)
964 uintmax_t r1, r0, q, p1, p0, t1, t0, s1, s0;
965 mi = -mi;
966 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
967 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
968 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
970 /* First compute a0 * <b1, b0> B^{-1}
971 +-----+
972 |a0 b0|
973 +--+--+--+
974 |a0 b1|
975 +--+--+--+
976 |q0 m0|
977 +--+--+--+
978 |q0 m1|
979 -+--+--+--+
980 |r1|r0| 0|
981 +--+--+--+
983 umul_ppmm (t1, t0, a0, b0);
984 umul_ppmm (r1, r0, a0, b1);
985 q = mi * t0;
986 umul_ppmm (p1, p0, q, m0);
987 umul_ppmm (s1, s0, q, m1);
988 r0 += (t0 != 0); /* Carry */
989 add_ssaaaa (r1, r0, r1, r0, 0, p1);
990 add_ssaaaa (r1, r0, r1, r0, 0, t1);
991 add_ssaaaa (r1, r0, r1, r0, s1, s0);
993 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
994 +-----+
995 |a1 b0|
996 +--+--+
997 |r1|r0|
998 +--+--+--+
999 |a1 b1|
1000 +--+--+--+
1001 |q1 m0|
1002 +--+--+--+
1003 |q1 m1|
1004 -+--+--+--+
1005 |r1|r0| 0|
1006 +--+--+--+
1008 umul_ppmm (t1, t0, a1, b0);
1009 umul_ppmm (s1, s0, a1, b1);
1010 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1011 q = mi * t0;
1012 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1013 umul_ppmm (p1, p0, q, m0);
1014 umul_ppmm (s1, s0, q, m1);
1015 r0 += (t0 != 0); /* Carry */
1016 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1017 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1018 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1020 if (ge2 (r1, r0, m1, m0))
1021 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1023 *r1p = r1;
1024 return r0;
1027 static uintmax_t _GL_ATTRIBUTE_CONST
1028 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1030 uintmax_t y = one;
1032 if (e & 1)
1033 y = b;
1035 while (e != 0)
1037 b = mulredc (b, b, n, ni);
1038 e >>= 1;
1040 if (e & 1)
1041 y = mulredc (y, b, n, ni);
1044 return y;
1047 static uintmax_t
1048 powm2 (uintmax_t *r1m,
1049 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1050 uintmax_t ni, const uintmax_t *one)
1052 uintmax_t r1, r0, b1, b0, n1, n0;
1053 unsigned int i;
1054 uintmax_t e;
1056 b0 = bp[0];
1057 b1 = bp[1];
1058 n0 = np[0];
1059 n1 = np[1];
1061 r0 = one[0];
1062 r1 = one[1];
1064 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1066 if (e & 1)
1068 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1069 r1 = *r1m;
1071 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1072 b1 = *r1m;
1074 for (e = ep[1]; e > 0; e >>= 1)
1076 if (e & 1)
1078 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1079 r1 = *r1m;
1081 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1082 b1 = *r1m;
1084 *r1m = r1;
1085 return r0;
1088 static bool _GL_ATTRIBUTE_CONST
1089 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1090 unsigned int k, uintmax_t one)
1092 uintmax_t y = powm (b, q, n, ni, one);
1094 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1096 if (y == one || y == nm1)
1097 return true;
1099 for (unsigned int i = 1; i < k; i++)
1101 y = mulredc (y, y, n, ni);
1103 if (y == nm1)
1104 return true;
1105 if (y == one)
1106 return false;
1108 return false;
1111 static bool
1112 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1113 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1115 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1117 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1118 y1 = r1m;
1120 if (y0 == one[0] && y1 == one[1])
1121 return true;
1123 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1125 if (y0 == nm1_0 && y1 == nm1_1)
1126 return true;
1128 for (unsigned int i = 1; i < k; i++)
1130 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1131 y1 = r1m;
1133 if (y0 == nm1_0 && y1 == nm1_1)
1134 return true;
1135 if (y0 == one[0] && y1 == one[1])
1136 return false;
1138 return false;
1141 #if HAVE_GMP
1142 static bool
1143 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1144 mpz_srcptr q, unsigned long int k)
1146 mpz_powm (y, x, q, n);
1148 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1149 return true;
1151 for (unsigned long int i = 1; i < k; i++)
1153 mpz_powm_ui (y, y, 2, n);
1154 if (mpz_cmp (y, nm1) == 0)
1155 return true;
1156 if (mpz_cmp_ui (y, 1) == 0)
1157 return false;
1159 return false;
1161 #endif
1163 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1164 have been observed. The average seem to be about 2. */
1165 static bool
1166 prime_p (uintmax_t n)
1168 int k;
1169 bool is_prime;
1170 uintmax_t a_prim, one, ni;
1171 struct factors factors;
1173 if (n <= 1)
1174 return false;
1176 /* We have already casted out small primes. */
1177 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1178 return true;
1180 /* Precomputation for Miller-Rabin. */
1181 uintmax_t q = n - 1;
1182 for (k = 0; (q & 1) == 0; k++)
1183 q >>= 1;
1185 uintmax_t a = 2;
1186 binv (ni, n); /* ni <- 1/n mod B */
1187 redcify (one, 1, n);
1188 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1190 /* Perform a Miller-Rabin test, finds most composites quickly. */
1191 if (!millerrabin (n, ni, a_prim, q, k, one))
1192 return false;
1194 if (flag_prove_primality)
1196 /* Factor n-1 for Lucas. */
1197 factor (0, n - 1, &factors);
1200 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1201 number composite. */
1202 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1204 if (flag_prove_primality)
1206 is_prime = true;
1207 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1209 is_prime
1210 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1213 else
1215 /* After enough Miller-Rabin runs, be content. */
1216 is_prime = (r == MR_REPS - 1);
1219 if (is_prime)
1220 return true;
1222 a += primes_diff[r]; /* Establish new base. */
1224 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1225 on most processors, since it avoids udiv_qrnnd. If we go down the
1226 udiv_qrnnd_preinv path, this code should be replaced. */
1228 uintmax_t s1, s0;
1229 umul_ppmm (s1, s0, one, a);
1230 if (LIKELY (s1 == 0))
1231 a_prim = s0 % n;
1232 else
1234 uintmax_t dummy ATTRIBUTE_UNUSED;
1235 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1239 if (!millerrabin (n, ni, a_prim, q, k, one))
1240 return false;
1243 error (0, 0, _("Lucas prime test failure. This should not happen"));
1244 abort ();
1247 static bool
1248 prime2_p (uintmax_t n1, uintmax_t n0)
1250 uintmax_t q[2], nm1[2];
1251 uintmax_t a_prim[2];
1252 uintmax_t one[2];
1253 uintmax_t na[2];
1254 uintmax_t ni;
1255 unsigned int k;
1256 struct factors factors;
1258 if (n1 == 0)
1259 return prime_p (n0);
1261 nm1[1] = n1 - (n0 == 0);
1262 nm1[0] = n0 - 1;
1263 if (nm1[0] == 0)
1265 count_trailing_zeros (k, nm1[1]);
1267 q[0] = nm1[1] >> k;
1268 q[1] = 0;
1269 k += W_TYPE_SIZE;
1271 else
1273 count_trailing_zeros (k, nm1[0]);
1274 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1277 uintmax_t a = 2;
1278 binv (ni, n0);
1279 redcify2 (one[1], one[0], 1, n1, n0);
1280 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1282 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1283 na[0] = n0;
1284 na[1] = n1;
1286 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1287 return false;
1289 if (flag_prove_primality)
1291 /* Factor n-1 for Lucas. */
1292 factor (nm1[1], nm1[0], &factors);
1295 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1296 number composite. */
1297 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1299 bool is_prime;
1300 uintmax_t e[2], y[2];
1302 if (flag_prove_primality)
1304 is_prime = true;
1305 if (factors.plarge[1])
1307 uintmax_t pi;
1308 binv (pi, factors.plarge[0]);
1309 e[0] = pi * nm1[0];
1310 e[1] = 0;
1311 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1312 is_prime = (y[0] != one[0] || y[1] != one[1]);
1314 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1316 /* FIXME: We always have the factor 2. Do we really need to
1317 handle it here? We have done the same powering as part
1318 of millerrabin. */
1319 if (factors.p[i] == 2)
1320 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1321 else
1322 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1323 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1324 is_prime = (y[0] != one[0] || y[1] != one[1]);
1327 else
1329 /* After enough Miller-Rabin runs, be content. */
1330 is_prime = (r == MR_REPS - 1);
1333 if (is_prime)
1334 return true;
1336 a += primes_diff[r]; /* Establish new base. */
1337 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1339 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1340 return false;
1343 error (0, 0, _("Lucas prime test failure. This should not happen"));
1344 abort ();
1347 #if HAVE_GMP
1348 static bool
1349 mp_prime_p (mpz_t n)
1351 bool is_prime;
1352 mpz_t q, a, nm1, tmp;
1353 struct mp_factors factors;
1355 if (mpz_cmp_ui (n, 1) <= 0)
1356 return false;
1358 /* We have already casted out small primes. */
1359 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1360 return true;
1362 mpz_inits (q, a, nm1, tmp, NULL);
1364 /* Precomputation for Miller-Rabin. */
1365 mpz_sub_ui (nm1, n, 1);
1367 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1368 unsigned long int k = mpz_scan1 (nm1, 0);
1369 mpz_tdiv_q_2exp (q, nm1, k);
1371 mpz_set_ui (a, 2);
1373 /* Perform a Miller-Rabin test, finds most composites quickly. */
1374 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1376 is_prime = false;
1377 goto ret2;
1380 if (flag_prove_primality)
1382 /* Factor n-1 for Lucas. */
1383 mpz_set (tmp, nm1);
1384 mp_factor (tmp, &factors);
1387 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1388 number composite. */
1389 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1391 if (flag_prove_primality)
1393 is_prime = true;
1394 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1396 mpz_divexact (tmp, nm1, factors.p[i]);
1397 mpz_powm (tmp, a, tmp, n);
1398 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1401 else
1403 /* After enough Miller-Rabin runs, be content. */
1404 is_prime = (r == MR_REPS - 1);
1407 if (is_prime)
1408 goto ret1;
1410 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1412 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1414 is_prime = false;
1415 goto ret1;
1419 error (0, 0, _("Lucas prime test failure. This should not happen"));
1420 abort ();
1422 ret1:
1423 if (flag_prove_primality)
1424 mp_factor_clear (&factors);
1425 ret2:
1426 mpz_clears (q, a, nm1, tmp, NULL);
1428 return is_prime;
1430 #endif
1432 static void
1433 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1434 struct factors *factors)
1436 uintmax_t x, z, y, P, t, ni, g;
1438 unsigned long int k = 1;
1439 unsigned long int l = 1;
1441 redcify (P, 1, n);
1442 addmod (x, P, P, n); /* i.e., redcify(2) */
1443 y = z = x;
1445 while (n != 1)
1447 assert (a < n);
1449 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1451 for (;;)
1455 x = mulredc (x, x, n, ni);
1456 addmod (x, x, a, n);
1458 submod (t, z, x, n);
1459 P = mulredc (P, t, n, ni);
1461 if (k % 32 == 1)
1463 if (gcd_odd (P, n) != 1)
1464 goto factor_found;
1465 y = x;
1468 while (--k != 0);
1470 z = x;
1471 k = l;
1472 l = 2 * l;
1473 for (unsigned long int i = 0; i < k; i++)
1475 x = mulredc (x, x, n, ni);
1476 addmod (x, x, a, n);
1478 y = x;
1481 factor_found:
1484 y = mulredc (y, y, n, ni);
1485 addmod (y, y, a, n);
1487 submod (t, z, y, n);
1488 g = gcd_odd (t, n);
1490 while (g == 1);
1492 n = n / g;
1494 if (!prime_p (g))
1495 factor_using_pollard_rho (g, a + 1, factors);
1496 else
1497 factor_insert (factors, g);
1499 if (prime_p (n))
1501 factor_insert (factors, n);
1502 break;
1505 x = x % n;
1506 z = z % n;
1507 y = y % n;
1511 static void
1512 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1513 struct factors *factors)
1515 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1517 unsigned long int k = 1;
1518 unsigned long int l = 1;
1520 redcify2 (P1, P0, 1, n1, n0);
1521 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1522 y1 = z1 = x1;
1523 y0 = z0 = x0;
1525 while (n1 != 0 || n0 != 1)
1527 binv (ni, n0);
1529 for (;;)
1533 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1534 x1 = r1m;
1535 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1537 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1538 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1539 P1 = r1m;
1541 if (k % 32 == 1)
1543 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1544 if (g1 != 0 || g0 != 1)
1545 goto factor_found;
1546 y1 = x1; y0 = x0;
1549 while (--k != 0);
1551 z1 = x1; z0 = x0;
1552 k = l;
1553 l = 2 * l;
1554 for (unsigned long int i = 0; i < k; i++)
1556 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1557 x1 = r1m;
1558 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1560 y1 = x1; y0 = x0;
1563 factor_found:
1566 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1567 y1 = r1m;
1568 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1570 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1571 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1573 while (g1 == 0 && g0 == 1);
1575 if (g1 == 0)
1577 /* The found factor is one word. */
1578 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1580 if (!prime_p (g0))
1581 factor_using_pollard_rho (g0, a + 1, factors);
1582 else
1583 factor_insert (factors, g0);
1585 else
1587 /* The found factor is two words. This is highly unlikely, thus hard
1588 to trigger. Please be careful before you change this code! */
1589 uintmax_t ginv;
1591 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1592 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1593 n1 = 0; /* modulo B, ignoring the high divisor word. */
1595 if (!prime2_p (g1, g0))
1596 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1597 else
1598 factor_insert_large (factors, g1, g0);
1601 if (n1 == 0)
1603 if (prime_p (n0))
1605 factor_insert (factors, n0);
1606 break;
1609 factor_using_pollard_rho (n0, a, factors);
1610 return;
1613 if (prime2_p (n1, n0))
1615 factor_insert_large (factors, n1, n0);
1616 break;
1619 x0 = mod2 (&x1, x1, x0, n1, n0);
1620 z0 = mod2 (&z1, z1, z0, n1, n0);
1621 y0 = mod2 (&y1, y1, y0, n1, n0);
1625 #if HAVE_GMP
1626 static void
1627 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1628 struct mp_factors *factors)
1630 mpz_t x, z, y, P;
1631 mpz_t t, t2;
1633 debug ("[pollard-rho (%lu)] ", a);
1635 mpz_inits (t, t2, NULL);
1636 mpz_init_set_si (y, 2);
1637 mpz_init_set_si (x, 2);
1638 mpz_init_set_si (z, 2);
1639 mpz_init_set_ui (P, 1);
1641 unsigned long long int k = 1;
1642 unsigned long long int l = 1;
1644 while (mpz_cmp_ui (n, 1) != 0)
1646 for (;;)
1650 mpz_mul (t, x, x);
1651 mpz_mod (x, t, n);
1652 mpz_add_ui (x, x, a);
1654 mpz_sub (t, z, x);
1655 mpz_mul (t2, P, t);
1656 mpz_mod (P, t2, n);
1658 if (k % 32 == 1)
1660 mpz_gcd (t, P, n);
1661 if (mpz_cmp_ui (t, 1) != 0)
1662 goto factor_found;
1663 mpz_set (y, x);
1666 while (--k != 0);
1668 mpz_set (z, x);
1669 k = l;
1670 l = 2 * l;
1671 for (unsigned long long int i = 0; i < k; i++)
1673 mpz_mul (t, x, x);
1674 mpz_mod (x, t, n);
1675 mpz_add_ui (x, x, a);
1677 mpz_set (y, x);
1680 factor_found:
1683 mpz_mul (t, y, y);
1684 mpz_mod (y, t, n);
1685 mpz_add_ui (y, y, a);
1687 mpz_sub (t, z, y);
1688 mpz_gcd (t, t, n);
1690 while (mpz_cmp_ui (t, 1) == 0);
1692 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1694 if (!mp_prime_p (t))
1696 debug ("[composite factor--restarting pollard-rho] ");
1697 mp_factor_using_pollard_rho (t, a + 1, factors);
1699 else
1701 mp_factor_insert (factors, t);
1704 if (mp_prime_p (n))
1706 mp_factor_insert (factors, n);
1707 break;
1710 mpz_mod (x, x, n);
1711 mpz_mod (z, z, n);
1712 mpz_mod (y, y, n);
1715 mpz_clears (P, t2, t, z, x, y, NULL);
1717 #endif
1719 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1720 algorithm is replaced, consider also returning the remainder. */
1721 static uintmax_t _GL_ATTRIBUTE_CONST
1722 isqrt (uintmax_t n)
1724 uintmax_t x;
1725 unsigned c;
1726 if (n == 0)
1727 return 0;
1729 count_leading_zeros (c, n);
1731 /* Make x > sqrt(n). This will be invariant through the loop. */
1732 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1734 for (;;)
1736 uintmax_t y = (x + n/x) / 2;
1737 if (y >= x)
1738 return x;
1740 x = y;
1744 static uintmax_t _GL_ATTRIBUTE_CONST
1745 isqrt2 (uintmax_t nh, uintmax_t nl)
1747 unsigned int shift;
1748 uintmax_t x;
1750 /* Ensures the remainder fits in an uintmax_t. */
1751 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1753 if (nh == 0)
1754 return isqrt (nl);
1756 count_leading_zeros (shift, nh);
1757 shift &= ~1;
1759 /* Make x > sqrt(n) */
1760 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1761 x <<= (W_TYPE_SIZE - shift) / 2;
1763 /* Do we need more than one iteration? */
1764 for (;;)
1766 uintmax_t r ATTRIBUTE_UNUSED;
1767 uintmax_t q, y;
1768 udiv_qrnnd (q, r, nh, nl, x);
1769 y = (x + q) / 2;
1771 if (y >= x)
1773 uintmax_t hi, lo;
1774 umul_ppmm (hi, lo, x + 1, x + 1);
1775 assert (gt2 (hi, lo, nh, nl));
1777 umul_ppmm (hi, lo, x, x);
1778 assert (ge2 (nh, nl, hi, lo));
1779 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1780 assert (hi == 0);
1782 return x;
1785 x = y;
1789 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1790 #define MAGIC64 ((uint64_t) 0x0202021202030213ULL)
1791 #define MAGIC63 ((uint64_t) 0x0402483012450293ULL)
1792 #define MAGIC65 ((uint64_t) 0x218a019866014613ULL)
1793 #define MAGIC11 0x23b
1795 /* Returns the square root if the input is a square, otherwise 0. */
1796 static uintmax_t _GL_ATTRIBUTE_CONST
1797 is_square (uintmax_t x)
1799 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1800 computing the square root. */
1801 if (((MAGIC64 >> (x & 63)) & 1)
1802 && ((MAGIC63 >> (x % 63)) & 1)
1803 /* Both 0 and 64 are squares mod (65) */
1804 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1805 && ((MAGIC11 >> (x % 11) & 1)))
1807 uintmax_t r = isqrt (x);
1808 if (r*r == x)
1809 return r;
1811 return 0;
1814 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1815 static const unsigned short invtab[0x81] =
1817 0x200,
1818 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1819 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1820 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1821 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1822 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1823 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1824 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1825 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1826 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1827 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1828 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1829 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1830 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1831 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1832 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1833 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1836 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1837 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1838 #define div_smallq(q, r, u, d) \
1839 do { \
1840 if ((u) / 0x40 < (d)) \
1842 int _cnt; \
1843 uintmax_t _dinv, _mask, _q, _r; \
1844 count_leading_zeros (_cnt, (d)); \
1845 _r = (u); \
1846 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1848 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1849 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1851 else \
1853 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1854 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1856 _r -= _q*(d); \
1858 _mask = -(uintmax_t) (_r >= (d)); \
1859 (r) = _r - (_mask & (d)); \
1860 (q) = _q - _mask; \
1861 assert ( (q) * (d) + (r) == u); \
1863 else \
1865 uintmax_t _q = (u) / (d); \
1866 (r) = (u) - _q * (d); \
1867 (q) = _q; \
1869 } while (0)
1871 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1872 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1873 with 3025 = 55^2.
1875 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1876 representing G_0 = (-55, 2*4652, 8653).
1878 In the notation of the paper:
1880 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1884 t_0 = floor([q_0 + R_0] / S0) = 1
1885 R_1 = t_0 * S_0 - R_0 = 4001
1886 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1889 /* Multipliers, in order of efficiency:
1890 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1891 0.7317 3*5*7 = 105 = 1
1892 0.7820 3*5*11 = 165 = 1
1893 0.7872 3*5 = 15 = 3
1894 0.8101 3*7*11 = 231 = 3
1895 0.8155 3*7 = 21 = 1
1896 0.8284 5*7*11 = 385 = 1
1897 0.8339 5*7 = 35 = 3
1898 0.8716 3*11 = 33 = 1
1899 0.8774 3 = 3 = 3
1900 0.8913 5*11 = 55 = 3
1901 0.8972 5 = 5 = 1
1902 0.9233 7*11 = 77 = 1
1903 0.9295 7 = 7 = 3
1904 0.9934 11 = 11 = 3
1906 #define QUEUE_SIZE 50
1908 #if STAT_SQUFOF
1909 # define Q_FREQ_SIZE 50
1910 /* Element 0 keeps the total */
1911 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1912 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1913 #endif
1915 /* Return true on success. Expected to fail only for numbers
1916 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1917 static bool
1918 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1920 /* Uses algorithm and notation from
1922 SQUARE FORM FACTORIZATION
1923 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1925 http://homes.cerias.purdue.edu/~ssw/squfof.pdf
1928 static const unsigned int multipliers_1[] =
1929 { /* = 1 (mod 4) */
1930 105, 165, 21, 385, 33, 5, 77, 1, 0
1932 static const unsigned int multipliers_3[] =
1933 { /* = 3 (mod 4) */
1934 1155, 15, 231, 35, 3, 55, 7, 11, 0
1937 const unsigned int *m;
1939 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1941 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1942 return false;
1944 uintmax_t sqrt_n = isqrt2 (n1, n0);
1946 if (n0 == sqrt_n * sqrt_n)
1948 uintmax_t p1, p0;
1950 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1951 assert (p0 == n0);
1953 if (n1 == p1)
1955 if (prime_p (sqrt_n))
1956 factor_insert_multiplicity (factors, sqrt_n, 2);
1957 else
1959 struct factors f;
1961 f.nfactors = 0;
1962 if (!factor_using_squfof (0, sqrt_n, &f))
1964 /* Try pollard rho instead */
1965 factor_using_pollard_rho (sqrt_n, 1, &f);
1967 /* Duplicate the new factors */
1968 for (unsigned int i = 0; i < f.nfactors; i++)
1969 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
1971 return true;
1975 /* Select multipliers so we always get n * mu = 3 (mod 4) */
1976 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
1977 *m; m++)
1979 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
1980 unsigned int i;
1981 unsigned int mu = *m;
1982 unsigned int qpos = 0;
1984 assert (mu * n0 % 4 == 3);
1986 /* In the notation of the paper, with mu * n == 3 (mod 4), we
1987 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
1988 I understand it, the necessary bound is 4 \mu^3 < n, or 32
1989 mu^3 < n.
1991 However, this seems insufficient: With n = 37243139 and mu =
1992 105, we get a trivial factor, from the square 38809 = 197^2,
1993 without any corresponding Q earlier in the iteration.
1995 Requiring 64 mu^3 < n seems sufficient. */
1996 if (n1 == 0)
1998 if ((uintmax_t) mu*mu*mu >= n0 / 64)
1999 continue;
2001 else
2003 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2004 continue;
2006 umul_ppmm (Dh, Dl, n0, mu);
2007 Dh += n1 * mu;
2009 assert (Dl % 4 != 1);
2010 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2012 S = isqrt2 (Dh, Dl);
2014 Q1 = 1;
2015 P = S;
2017 /* Square root remainder fits in one word, so ignore high part. */
2018 Q = Dl - P*P;
2019 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2020 L = isqrt (2*S);
2021 B = 2*L;
2022 L1 = mu * 2 * L;
2024 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2025 4 D. */
2027 for (i = 0; i <= B; i++)
2029 uintmax_t q, P1, t, rem;
2031 div_smallq (q, rem, S+P, Q);
2032 P1 = S - rem; /* P1 = q*Q - P */
2034 #if STAT_SQUFOF
2035 assert (q > 0);
2036 q_freq[0]++;
2037 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2038 #endif
2040 if (Q <= L1)
2042 uintmax_t g = Q;
2044 if ( (Q & 1) == 0)
2045 g /= 2;
2047 g /= gcd_odd (g, mu);
2049 if (g <= L)
2051 if (qpos >= QUEUE_SIZE)
2052 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2053 queue[qpos].Q = g;
2054 queue[qpos].P = P % g;
2055 qpos++;
2059 /* I think the difference can be either sign, but mod
2060 2^W_TYPE_SIZE arithmetic should be fine. */
2061 t = Q1 + q * (P - P1);
2062 Q1 = Q;
2063 Q = t;
2064 P = P1;
2066 if ( (i & 1) == 0)
2068 uintmax_t r = is_square (Q);
2069 if (r)
2071 for (unsigned int j = 0; j < qpos; j++)
2073 if (queue[j].Q == r)
2075 if (r == 1)
2076 /* Traversed entire cycle. */
2077 goto next_multiplier;
2079 /* Need the absolute value for divisibility test. */
2080 if (P >= queue[j].P)
2081 t = P - queue[j].P;
2082 else
2083 t = queue[j].P - P;
2084 if (t % r == 0)
2086 /* Delete entries up to and including entry
2087 j, which matched. */
2088 memmove (queue, queue + j + 1,
2089 (qpos - j - 1) * sizeof (queue[0]));
2090 qpos -= (j + 1);
2092 goto next_i;
2096 /* We have found a square form, which should give a
2097 factor. */
2098 Q1 = r;
2099 assert (S >= P); /* What signs are possible? */
2100 P += r * ((S - P) / r);
2102 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2103 for the case D = 2N. */
2104 /* Compute Q = (D - P*P) / Q1, but we need double
2105 precision. */
2106 uintmax_t hi, lo;
2107 umul_ppmm (hi, lo, P, P);
2108 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2109 udiv_qrnnd (Q, rem, hi, lo, Q1);
2110 assert (rem == 0);
2112 for (;;)
2114 /* Note: There appears to by a typo in the paper,
2115 Step 4a in the algorithm description says q <--
2116 floor([S+P]/\hat Q), but looking at the equations
2117 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2118 (In this code, \hat Q is Q1). */
2119 div_smallq (q, rem, S+P, Q);
2120 P1 = S - rem; /* P1 = q*Q - P */
2122 #if STAT_SQUFOF
2123 q_freq[0]++;
2124 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2125 #endif
2126 if (P == P1)
2127 break;
2128 t = Q1 + q * (P - P1);
2129 Q1 = Q;
2130 Q = t;
2131 P = P1;
2134 if ( (Q & 1) == 0)
2135 Q /= 2;
2136 Q /= gcd_odd (Q, mu);
2138 assert (Q > 1 && (n1 || Q < n0));
2140 if (prime_p (Q))
2141 factor_insert (factors, Q);
2142 else if (!factor_using_squfof (0, Q, factors))
2143 factor_using_pollard_rho (Q, 2, factors);
2145 divexact_21 (n1, n0, n1, n0, Q);
2147 if (prime2_p (n1, n0))
2148 factor_insert_large (factors, n1, n0);
2149 else
2151 if (!factor_using_squfof (n1, n0, factors))
2153 if (n1 == 0)
2154 factor_using_pollard_rho (n0, 1, factors);
2155 else
2156 factor_using_pollard_rho2 (n1, n0, 1, factors);
2160 return true;
2163 next_i:;
2165 next_multiplier:;
2167 return false;
2170 static void
2171 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2173 factors->nfactors = 0;
2174 factors->plarge[1] = 0;
2176 if (t1 == 0 && t0 < 2)
2177 return;
2179 t0 = factor_using_division (&t1, t1, t0, factors);
2181 if (t1 == 0 && t0 < 2)
2182 return;
2184 if (prime2_p (t1, t0))
2185 factor_insert_large (factors, t1, t0);
2186 else
2188 if (alg == ALG_SQUFOF)
2189 if (factor_using_squfof (t1, t0, factors))
2190 return;
2192 if (t1 == 0)
2193 factor_using_pollard_rho (t0, 1, factors);
2194 else
2195 factor_using_pollard_rho2 (t1, t0, 1, factors);
2199 #if HAVE_GMP
2200 static void
2201 mp_factor (mpz_t t, struct mp_factors *factors)
2203 mp_factor_init (factors);
2205 if (mpz_sgn (t) != 0)
2207 mp_factor_using_division (t, factors);
2209 if (mpz_cmp_ui (t, 1) != 0)
2211 debug ("[is number prime?] ");
2212 if (mp_prime_p (t))
2213 mp_factor_insert (factors, t);
2214 else
2215 mp_factor_using_pollard_rho (t, 1, factors);
2219 #endif
2221 static strtol_error
2222 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2224 unsigned int lo_carry;
2225 uintmax_t hi = 0, lo = 0;
2227 strtol_error err = LONGINT_INVALID;
2229 /* Skip initial spaces and '+'. */
2230 for (;;)
2232 char c = *s;
2233 if (c == ' ')
2234 s++;
2235 else if (c == '+')
2237 s++;
2238 break;
2240 else
2241 break;
2244 /* Initial scan for invalid digits. */
2245 const char *p = s;
2246 for (;;)
2248 unsigned int c = *p++;
2249 if (c == 0)
2250 break;
2252 if (UNLIKELY (!ISDIGIT (c)))
2254 err = LONGINT_INVALID;
2255 break;
2258 err = LONGINT_OK; /* we've seen at least one valid digit */
2261 for (;err == LONGINT_OK;)
2263 unsigned int c = *s++;
2264 if (c == 0)
2265 break;
2267 c -= '0';
2269 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2271 err = LONGINT_OVERFLOW;
2272 break;
2274 hi = 10 * hi;
2276 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2277 lo_carry += 10 * lo < 2 * lo;
2279 lo = 10 * lo;
2280 lo += c;
2282 lo_carry += lo < c;
2283 hi += lo_carry;
2284 if (UNLIKELY (hi < lo_carry))
2286 err = LONGINT_OVERFLOW;
2287 break;
2291 *hip = hi;
2292 *lop = lo;
2294 return err;
2297 static void
2298 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2300 uintmax_t q, r;
2302 if (t1 == 0)
2303 printf ("%ju", t0);
2304 else
2306 /* Use very plain code here since it seems hard to write fast code
2307 without assuming a specific word size. */
2308 q = t1 / 1000000000;
2309 r = t1 % 1000000000;
2310 udiv_qrnnd (t0, r, r, t0, 1000000000);
2311 print_uintmaxes (q, t0);
2312 printf ("%09u", (int) r);
2316 /* Single-precision factoring */
2317 static void
2318 print_factors_single (uintmax_t t1, uintmax_t t0)
2320 struct factors factors;
2322 print_uintmaxes (t1, t0);
2323 putchar (':');
2325 factor (t1, t0, &factors);
2327 for (unsigned int j = 0; j < factors.nfactors; j++)
2328 for (unsigned int k = 0; k < factors.e[j]; k++)
2330 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2331 putchar (' ');
2332 fputs (umaxtostr (factors.p[j], buf), stdout);
2335 if (factors.plarge[1])
2337 putchar (' ');
2338 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2340 putchar ('\n');
2343 /* Emit the factors of the indicated number. If we have the option of using
2344 either algorithm, we select on the basis of the length of the number.
2345 For longer numbers, we prefer the MP algorithm even if the native algorithm
2346 has enough digits, because the algorithm is better. The turnover point
2347 depends on the value. */
2348 static bool
2349 print_factors (const char *input)
2351 uintmax_t t1, t0;
2353 /* Try converting the number to one or two words. If it fails, use GMP or
2354 print an error message. The 2nd condition checks that the most
2355 significant bit of the two-word number is clear, in a typesize neutral
2356 way. */
2357 strtol_error err = strto2uintmax (&t1, &t0, input);
2359 switch (err)
2361 case LONGINT_OK:
2362 if (((t1 << 1) >> 1) == t1)
2364 debug ("[%s]", _("using single-precision arithmetic"));
2365 print_factors_single (t1, t0);
2366 return true;
2368 break;
2370 case LONGINT_OVERFLOW:
2371 /* Try GMP. */
2372 break;
2374 default:
2375 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2376 return false;
2379 #if HAVE_GMP
2380 debug ("[%s]", _("using arbitrary-precision arithmetic"));
2381 mpz_t t;
2382 struct mp_factors factors;
2384 mpz_init_set_str (t, input, 10);
2386 gmp_printf ("%Zd:", t);
2387 mp_factor (t, &factors);
2389 for (unsigned int j = 0; j < factors.nfactors; j++)
2390 for (unsigned int k = 0; k < factors.e[j]; k++)
2391 gmp_printf (" %Zd", factors.p[j]);
2393 mp_factor_clear (&factors);
2394 mpz_clear (t);
2395 putchar ('\n');
2396 return true;
2397 #else
2398 error (0, 0, _("%s is too large"), quote (input));
2399 return false;
2400 #endif
2403 void
2404 usage (int status)
2406 if (status != EXIT_SUCCESS)
2407 emit_try_help ();
2408 else
2410 printf (_("\
2411 Usage: %s [NUMBER]...\n\
2412 or: %s OPTION\n\
2414 program_name, program_name);
2415 fputs (_("\
2416 Print the prime factors of each specified integer NUMBER. If none\n\
2417 are specified on the command line, read them from standard input.\n\
2419 "), stdout);
2420 fputs (HELP_OPTION_DESCRIPTION, stdout);
2421 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2422 emit_ancillary_info ();
2424 exit (status);
2427 static bool
2428 do_stdin (void)
2430 bool ok = true;
2431 token_buffer tokenbuffer;
2433 init_tokenbuffer (&tokenbuffer);
2435 while (true)
2437 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2438 &tokenbuffer);
2439 if (token_length == (size_t) -1)
2440 break;
2441 ok &= print_factors (tokenbuffer.buffer);
2443 free (tokenbuffer.buffer);
2445 return ok;
2449 main (int argc, char **argv)
2451 initialize_main (&argc, &argv);
2452 set_program_name (argv[0]);
2453 setlocale (LC_ALL, "");
2454 bindtextdomain (PACKAGE, LOCALEDIR);
2455 textdomain (PACKAGE);
2457 atexit (close_stdout);
2459 alg = ALG_POLLARD_RHO; /* Default to Pollard rho */
2461 int c;
2462 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2464 switch (c)
2466 case VERBOSE_OPTION:
2467 verbose = 1;
2468 break;
2470 case 's':
2471 alg = ALG_SQUFOF;
2472 break;
2474 case 'w':
2475 flag_prove_primality = false;
2476 break;
2478 case_GETOPT_HELP_CHAR;
2480 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2482 default:
2483 usage (EXIT_FAILURE);
2487 #if STAT_SQUFOF
2488 if (alg == ALG_SQUFOF)
2489 memset (q_freq, 0, sizeof (q_freq));
2490 #endif
2492 bool ok;
2493 if (argc <= optind)
2494 ok = do_stdin ();
2495 else
2497 ok = true;
2498 for (int i = optind; i < argc; i++)
2499 if (! print_factors (argv[i]))
2500 ok = false;
2503 #if STAT_SQUFOF
2504 if (alg == ALG_SQUFOF && q_freq[0] > 0)
2506 double acc_f;
2507 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2508 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2510 double f = (double) q_freq[i] / q_freq[0];
2511 acc_f += f;
2512 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2513 100.0 * f, 100.0 * acc_f);
2516 #endif
2518 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);