dd: output final progress before syncing
[coreutils.git] / src / factor.c
blob66ca3878bed0873935db33350efe0f48842935fd
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2022 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Algorithm:
40 (1) Perform trial division using a small primes table, but without hardware
41 division since the primes table store inverses modulo the word base.
42 (The GMP variant of this code doesn't make use of the precomputed
43 inverses, but instead relies on GMP for fast divisibility testing.)
44 (2) Check the nature of any non-factored part using Miller-Rabin for
45 detecting composites, and Lucas for detecting primes.
46 (3) Factor any remaining composite part using the Pollard-Brent rho
47 algorithm or if USE_SQUFOF is defined to 1, try that first.
48 Status of found factors are checked again using Miller-Rabin and Lucas.
50 We prefer using Hensel norm in the divisions, not the more familiar
51 Euclidian norm, since the former leads to much faster code. In the
52 Pollard-Brent rho code and the prime testing code, we use Montgomery's
53 trick of multiplying all n-residues by the word base, allowing cheap Hensel
54 reductions mod n.
56 The GMP code uses an algorithm that can be considerably slower;
57 for example, on a circa-2017 Intel Xeon Silver 4116, factoring
58 2^{127}-3 takes about 50 ms with the two-word algorithm but would
59 take about 750 ms with the GMP code.
61 Improvements:
63 * Use modular inverses also for exact division in the Lucas code, and
64 elsewhere. A problem is to locate the inverses not from an index, but
65 from a prime. We might instead compute the inverse on-the-fly.
67 * Tune trial division table size (not forgetting that this is a standalone
68 program where the table will be read from secondary storage for
69 each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #include <gmp.h>
108 #include <assert.h>
110 #include "system.h"
111 #include "die.h"
112 #include "error.h"
113 #include "full-write.h"
114 #include "quote.h"
115 #include "readtokens.h"
116 #include "xstrtol.h"
118 /* The official name of this program (e.g., no 'g' prefix). */
119 #define PROGRAM_NAME "factor"
121 #define AUTHORS \
122 proper_name ("Paul Rubin"), \
123 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
124 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
126 /* Token delimiters when reading from a file. */
127 #define DELIM "\n\t "
129 #ifndef USE_LONGLONG_H
130 /* With the way we use longlong.h, it's only safe to use
131 when UWtype = UHWtype, as there were various cases
132 (as can be seen in the history for longlong.h) where
133 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
134 to avoid compile time or run time issues. */
135 # if LONG_MAX == INTMAX_MAX
136 # define USE_LONGLONG_H 1
137 # endif
138 #endif
140 #if USE_LONGLONG_H
142 /* Make definitions for longlong.h to make it do what it can do for us */
144 /* bitcount for uintmax_t */
145 # if UINTMAX_MAX == UINT32_MAX
146 # define W_TYPE_SIZE 32
147 # elif UINTMAX_MAX == UINT64_MAX
148 # define W_TYPE_SIZE 64
149 # elif UINTMAX_MAX == UINT128_MAX
150 # define W_TYPE_SIZE 128
151 # endif
153 # define UWtype uintmax_t
154 # define UHWtype unsigned long int
155 # undef UDWtype
156 # if HAVE_ATTRIBUTE_MODE
157 typedef unsigned int UQItype __attribute__ ((mode (QI)));
158 typedef int SItype __attribute__ ((mode (SI)));
159 typedef unsigned int USItype __attribute__ ((mode (SI)));
160 typedef int DItype __attribute__ ((mode (DI)));
161 typedef unsigned int UDItype __attribute__ ((mode (DI)));
162 # else
163 typedef unsigned char UQItype;
164 typedef long SItype;
165 typedef unsigned long int USItype;
166 # if HAVE_LONG_LONG_INT
167 typedef long long int DItype;
168 typedef unsigned long long int UDItype;
169 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
170 typedef long int DItype;
171 typedef unsigned long int UDItype;
172 # endif
173 # endif
174 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
175 # define ASSERT(x) /* FIXME make longlong.h really standalone */
176 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
177 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
178 # ifndef __GMP_GNUC_PREREQ
179 # define __GMP_GNUC_PREREQ(a,b) 1
180 # endif
182 /* These stub macros are only used in longlong.h in certain system compiler
183 combinations, so ensure usage to avoid -Wunused-macros warnings. */
184 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
185 ASSERT (1)
186 __GMP_DECLSPEC
187 # endif
189 # if _ARCH_PPC
190 # define HAVE_HOST_CPU_FAMILY_powerpc 1
191 # endif
192 # include "longlong.h"
193 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
194 const unsigned char factor_clz_tab[129] =
196 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
197 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
198 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
199 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
202 # endif
204 #else /* not USE_LONGLONG_H */
206 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
207 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
208 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
209 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
211 #endif
213 #if !defined __clz_tab && !defined UHWtype
214 /* Without this seemingly useless conditional, gcc -Wunused-macros
215 warns that each of the two tested macros is unused on Fedora 18.
216 FIXME: this is just an ugly band-aid. Fix it properly. */
217 #endif
219 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
220 #define MAX_NFACTS 26
222 enum
224 DEV_DEBUG_OPTION = CHAR_MAX + 1
227 static struct option const long_options[] =
229 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
230 {GETOPT_HELP_OPTION_DECL},
231 {GETOPT_VERSION_OPTION_DECL},
232 {NULL, 0, NULL, 0}
235 struct factors
237 uintmax_t plarge[2]; /* Can have a single large factor */
238 uintmax_t p[MAX_NFACTS];
239 unsigned char e[MAX_NFACTS];
240 unsigned char nfactors;
243 struct mp_factors
245 mpz_t *p;
246 unsigned long int *e;
247 unsigned long int nfactors;
250 static void factor (uintmax_t, uintmax_t, struct factors *);
252 #ifndef umul_ppmm
253 # define umul_ppmm(w1, w0, u, v) \
254 do { \
255 uintmax_t __x0, __x1, __x2, __x3; \
256 unsigned long int __ul, __vl, __uh, __vh; \
257 uintmax_t __u = (u), __v = (v); \
259 __ul = __ll_lowpart (__u); \
260 __uh = __ll_highpart (__u); \
261 __vl = __ll_lowpart (__v); \
262 __vh = __ll_highpart (__v); \
264 __x0 = (uintmax_t) __ul * __vl; \
265 __x1 = (uintmax_t) __ul * __vh; \
266 __x2 = (uintmax_t) __uh * __vl; \
267 __x3 = (uintmax_t) __uh * __vh; \
269 __x1 += __ll_highpart (__x0);/* This can't give carry. */ \
270 __x1 += __x2; /* But this indeed can. */ \
271 if (__x1 < __x2) /* Did we get it? */ \
272 __x3 += __ll_B; /* Yes, add it in the proper pos. */ \
274 (w1) = __x3 + __ll_highpart (__x1); \
275 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
276 } while (0)
277 #endif
279 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
280 /* Define our own, not needing normalization. This function is
281 currently not performance critical, so keep it simple. Similar to
282 the mod macro below. */
283 # undef udiv_qrnnd
284 # define udiv_qrnnd(q, r, n1, n0, d) \
285 do { \
286 uintmax_t __d1, __d0, __q, __r1, __r0; \
288 assert ((n1) < (d)); \
289 __d1 = (d); __d0 = 0; \
290 __r1 = (n1); __r0 = (n0); \
291 __q = 0; \
292 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
294 rsh2 (__d1, __d0, __d1, __d0, 1); \
295 __q <<= 1; \
296 if (ge2 (__r1, __r0, __d1, __d0)) \
298 __q++; \
299 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
302 (r) = __r0; \
303 (q) = __q; \
304 } while (0)
305 #endif
307 #if !defined add_ssaaaa
308 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
309 do { \
310 uintmax_t _add_x; \
311 _add_x = (al) + (bl); \
312 (sh) = (ah) + (bh) + (_add_x < (al)); \
313 (sl) = _add_x; \
314 } while (0)
315 #endif
317 #define rsh2(rh, rl, ah, al, cnt) \
318 do { \
319 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
320 (rh) = (ah) >> (cnt); \
321 } while (0)
323 #define lsh2(rh, rl, ah, al, cnt) \
324 do { \
325 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
326 (rl) = (al) << (cnt); \
327 } while (0)
329 #define ge2(ah, al, bh, bl) \
330 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
332 #define gt2(ah, al, bh, bl) \
333 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
335 #ifndef sub_ddmmss
336 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
337 do { \
338 uintmax_t _cy; \
339 _cy = (al) < (bl); \
340 (rl) = (al) - (bl); \
341 (rh) = (ah) - (bh) - _cy; \
342 } while (0)
343 #endif
345 #ifndef count_leading_zeros
346 # define count_leading_zeros(count, x) do { \
347 uintmax_t __clz_x = (x); \
348 unsigned int __clz_c; \
349 for (__clz_c = 0; \
350 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
351 __clz_c += 8) \
352 __clz_x <<= 8; \
353 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
354 __clz_x <<= 1; \
355 (count) = __clz_c; \
356 } while (0)
357 #endif
359 #ifndef count_trailing_zeros
360 # define count_trailing_zeros(count, x) do { \
361 uintmax_t __ctz_x = (x); \
362 unsigned int __ctz_c = 0; \
363 while ((__ctz_x & 1) == 0) \
365 __ctz_x >>= 1; \
366 __ctz_c++; \
368 (count) = __ctz_c; \
369 } while (0)
370 #endif
372 /* Requires that a < n and b <= n */
373 #define submod(r,a,b,n) \
374 do { \
375 uintmax_t _t = - (uintmax_t) (a < b); \
376 (r) = ((n) & _t) + (a) - (b); \
377 } while (0)
379 #define addmod(r,a,b,n) \
380 submod ((r), (a), ((n) - (b)), (n))
382 /* Modular two-word addition and subtraction. For performance reasons, the
383 most significant bit of n1 must be clear. The destination variables must be
384 distinct from the mod operand. */
385 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
386 do { \
387 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
388 if (ge2 ((r1), (r0), (n1), (n0))) \
389 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
390 } while (0)
391 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
392 do { \
393 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
394 if ((intmax_t) (r1) < 0) \
395 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
396 } while (0)
398 #define HIGHBIT_TO_MASK(x) \
399 (((intmax_t)-1 >> 1) < 0 \
400 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
401 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
402 ? UINTMAX_MAX : (uintmax_t) 0))
404 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
405 Requires that d1 != 0. */
406 static uintmax_t
407 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
409 int cntd, cnta;
411 assert (d1 != 0);
413 if (a1 == 0)
415 *r1 = 0;
416 return a0;
419 count_leading_zeros (cntd, d1);
420 count_leading_zeros (cnta, a1);
421 int cnt = cntd - cnta;
422 lsh2 (d1, d0, d1, d0, cnt);
423 for (int i = 0; i < cnt; i++)
425 if (ge2 (a1, a0, d1, d0))
426 sub_ddmmss (a1, a0, a1, a0, d1, d0);
427 rsh2 (d1, d0, d1, d0, 1);
430 *r1 = a1;
431 return a0;
434 ATTRIBUTE_CONST
435 static uintmax_t
436 gcd_odd (uintmax_t a, uintmax_t b)
438 if ((b & 1) == 0)
440 uintmax_t t = b;
441 b = a;
442 a = t;
444 if (a == 0)
445 return b;
447 /* Take out least significant one bit, to make room for sign */
448 b >>= 1;
450 for (;;)
452 uintmax_t t;
453 uintmax_t bgta;
455 while ((a & 1) == 0)
456 a >>= 1;
457 a >>= 1;
459 t = a - b;
460 if (t == 0)
461 return (a << 1) + 1;
463 bgta = HIGHBIT_TO_MASK (t);
465 /* b <-- min (a, b) */
466 b += (bgta & t);
468 /* a <-- |a - b| */
469 a = (t ^ bgta) - bgta;
473 static uintmax_t
474 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
476 assert (b0 & 1);
478 if ((a0 | a1) == 0)
480 *r1 = b1;
481 return b0;
484 while ((a0 & 1) == 0)
485 rsh2 (a1, a0, a1, a0, 1);
487 for (;;)
489 if ((b1 | a1) == 0)
491 *r1 = 0;
492 return gcd_odd (b0, a0);
495 if (gt2 (a1, a0, b1, b0))
497 sub_ddmmss (a1, a0, a1, a0, b1, b0);
499 rsh2 (a1, a0, a1, a0, 1);
500 while ((a0 & 1) == 0);
502 else if (gt2 (b1, b0, a1, a0))
504 sub_ddmmss (b1, b0, b1, b0, a1, a0);
506 rsh2 (b1, b0, b1, b0, 1);
507 while ((b0 & 1) == 0);
509 else
510 break;
513 *r1 = a1;
514 return a0;
517 static void
518 factor_insert_multiplicity (struct factors *factors,
519 uintmax_t prime, unsigned int m)
521 unsigned int nfactors = factors->nfactors;
522 uintmax_t *p = factors->p;
523 unsigned char *e = factors->e;
525 /* Locate position for insert new or increment e. */
526 int i;
527 for (i = nfactors - 1; i >= 0; i--)
529 if (p[i] <= prime)
530 break;
533 if (i < 0 || p[i] != prime)
535 for (int j = nfactors - 1; j > i; j--)
537 p[j + 1] = p[j];
538 e[j + 1] = e[j];
540 p[i + 1] = prime;
541 e[i + 1] = m;
542 factors->nfactors = nfactors + 1;
544 else
546 e[i] += m;
550 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
552 static void
553 factor_insert_large (struct factors *factors,
554 uintmax_t p1, uintmax_t p0)
556 if (p1 > 0)
558 assert (factors->plarge[1] == 0);
559 factors->plarge[0] = p0;
560 factors->plarge[1] = p1;
562 else
563 factor_insert (factors, p0);
566 #ifndef mpz_inits
568 # include <stdarg.h>
570 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
571 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
573 static void
574 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
576 va_list ap;
578 va_start (ap, mpz_single_init);
580 mpz_t *mpz;
581 while ((mpz = va_arg (ap, mpz_t *)))
582 mpz_single_init (*mpz);
584 va_end (ap);
586 #endif
588 static void mp_factor (mpz_t, struct mp_factors *);
590 static void
591 mp_factor_init (struct mp_factors *factors)
593 factors->p = NULL;
594 factors->e = NULL;
595 factors->nfactors = 0;
598 static void
599 mp_factor_clear (struct mp_factors *factors)
601 for (unsigned int i = 0; i < factors->nfactors; i++)
602 mpz_clear (factors->p[i]);
604 free (factors->p);
605 free (factors->e);
608 static void
609 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
611 unsigned long int nfactors = factors->nfactors;
612 mpz_t *p = factors->p;
613 unsigned long int *e = factors->e;
614 long i;
616 /* Locate position for insert new or increment e. */
617 for (i = nfactors - 1; i >= 0; i--)
619 if (mpz_cmp (p[i], prime) <= 0)
620 break;
623 if (i < 0 || mpz_cmp (p[i], prime) != 0)
625 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
626 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
628 mpz_init (p[nfactors]);
629 for (long j = nfactors - 1; j > i; j--)
631 mpz_set (p[j + 1], p[j]);
632 e[j + 1] = e[j];
634 mpz_set (p[i + 1], prime);
635 e[i + 1] = 1;
637 factors->p = p;
638 factors->e = e;
639 factors->nfactors = nfactors + 1;
641 else
643 e[i] += 1;
647 static void
648 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
650 mpz_t pz;
652 mpz_init_set_ui (pz, prime);
653 mp_factor_insert (factors, pz);
654 mpz_clear (pz);
658 /* Number of bits in an uintmax_t. */
659 enum { W = sizeof (uintmax_t) * CHAR_BIT };
661 /* Verify that uintmax_t does not have holes in its representation. */
662 verify (UINTMAX_MAX >> (W - 1) == 1);
664 #define P(a,b,c,d) a,
665 static const unsigned char primes_diff[] = {
666 #include "primes.h"
667 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
669 #undef P
671 #define PRIMES_PTAB_ENTRIES \
672 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
674 #define P(a,b,c,d) b,
675 static const unsigned char primes_diff8[] = {
676 #include "primes.h"
677 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
679 #undef P
681 struct primes_dtab
683 uintmax_t binv, lim;
686 #define P(a,b,c,d) {c,d},
687 static const struct primes_dtab primes_dtab[] = {
688 #include "primes.h"
689 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
691 #undef P
693 /* Verify that uintmax_t is not wider than
694 the integers used to generate primes.h. */
695 verify (W <= WIDE_UINT_BITS);
697 /* debugging for developers. Enables devmsg().
698 This flag is used only in the GMP code. */
699 static bool dev_debug = false;
701 /* Prove primality or run probabilistic tests. */
702 static bool flag_prove_primality = PROVE_PRIMALITY;
704 /* Number of Miller-Rabin tests to run when not proving primality. */
705 #define MR_REPS 25
707 static void
708 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
709 unsigned int off)
711 for (unsigned int j = 0; j < off; j++)
712 p += primes_diff[i + j];
713 factor_insert (factors, p);
716 /* Trial division with odd primes uses the following trick.
718 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
719 consider the case t < B (this is the second loop below).
721 From our tables we get
723 binv = p^{-1} (mod B)
724 lim = floor ((B-1) / p).
726 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
727 (and all quotients in this range occur for some t).
729 Then t = q * p is true also (mod B), and p is invertible we get
731 q = t * binv (mod B).
733 Next, assume that t is *not* divisible by p. Since multiplication
734 by binv (mod B) is a one-to-one mapping,
736 t * binv (mod B) > lim,
738 because all the smaller values are already taken.
740 This can be summed up by saying that the function
742 q(t) = binv * t (mod B)
744 is a permutation of the range 0 <= t < B, with the curious property
745 that it maps the multiples of p onto the range 0 <= q <= lim, in
746 order, and the non-multiples of p onto the range lim < q < B.
749 static uintmax_t
750 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
751 struct factors *factors)
753 if (t0 % 2 == 0)
755 unsigned int cnt;
757 if (t0 == 0)
759 count_trailing_zeros (cnt, t1);
760 t0 = t1 >> cnt;
761 t1 = 0;
762 cnt += W_TYPE_SIZE;
764 else
766 count_trailing_zeros (cnt, t0);
767 rsh2 (t1, t0, t1, t0, cnt);
770 factor_insert_multiplicity (factors, 2, cnt);
773 uintmax_t p = 3;
774 unsigned int i;
775 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
777 for (;;)
779 uintmax_t q1, q0, hi;
780 MAYBE_UNUSED uintmax_t lo;
782 q0 = t0 * primes_dtab[i].binv;
783 umul_ppmm (hi, lo, q0, p);
784 if (hi > t1)
785 break;
786 hi = t1 - hi;
787 q1 = hi * primes_dtab[i].binv;
788 if (LIKELY (q1 > primes_dtab[i].lim))
789 break;
790 t1 = q1; t0 = q0;
791 factor_insert (factors, p);
793 p += primes_diff[i + 1];
795 if (t1p)
796 *t1p = t1;
798 #define DIVBLOCK(I) \
799 do { \
800 for (;;) \
802 q = t0 * pd[I].binv; \
803 if (LIKELY (q > pd[I].lim)) \
804 break; \
805 t0 = q; \
806 factor_insert_refind (factors, p, i + 1, I); \
808 } while (0)
810 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
812 uintmax_t q;
813 const struct primes_dtab *pd = &primes_dtab[i];
814 DIVBLOCK (0);
815 DIVBLOCK (1);
816 DIVBLOCK (2);
817 DIVBLOCK (3);
818 DIVBLOCK (4);
819 DIVBLOCK (5);
820 DIVBLOCK (6);
821 DIVBLOCK (7);
823 p += primes_diff8[i];
824 if (p * p > t0)
825 break;
828 return t0;
831 static void
832 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
834 mpz_t q;
835 unsigned long int p;
837 devmsg ("[trial division] ");
839 mpz_init (q);
841 p = mpz_scan1 (t, 0);
842 mpz_fdiv_q_2exp (t, t, p);
843 while (p)
845 mp_factor_insert_ui (factors, 2);
846 --p;
849 p = 3;
850 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
852 if (! mpz_divisible_ui_p (t, p))
854 p += primes_diff[i++];
855 if (mpz_cmp_ui (t, p * p) < 0)
856 break;
858 else
860 mpz_tdiv_q_ui (t, t, p);
861 mp_factor_insert_ui (factors, p);
865 mpz_clear (q);
868 /* Entry i contains (2i+1)^(-1) mod 2^8. */
869 static const unsigned char binvert_table[128] =
871 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
872 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
873 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
874 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
875 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
876 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
877 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
878 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
879 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
880 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
881 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
882 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
883 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
884 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
885 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
886 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
889 /* Compute n^(-1) mod B, using a Newton iteration. */
890 #define binv(inv,n) \
891 do { \
892 uintmax_t __n = (n); \
893 uintmax_t __inv; \
895 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
896 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
897 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
898 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
900 if (W_TYPE_SIZE > 64) \
902 int __invbits = 64; \
903 do { \
904 __inv = 2 * __inv - __inv * __inv * __n; \
905 __invbits *= 2; \
906 } while (__invbits < W_TYPE_SIZE); \
909 (inv) = __inv; \
910 } while (0)
912 /* q = u / d, assuming d|u. */
913 #define divexact_21(q1, q0, u1, u0, d) \
914 do { \
915 uintmax_t _di, _q0; \
916 binv (_di, (d)); \
917 _q0 = (u0) * _di; \
918 if ((u1) >= (d)) \
920 uintmax_t _p1; \
921 MAYBE_UNUSED intmax_t _p0; \
922 umul_ppmm (_p1, _p0, _q0, d); \
923 (q1) = ((u1) - _p1) * _di; \
924 (q0) = _q0; \
926 else \
928 (q0) = _q0; \
929 (q1) = 0; \
931 } while (0)
933 /* x B (mod n). */
934 #define redcify(r_prim, r, n) \
935 do { \
936 MAYBE_UNUSED uintmax_t _redcify_q; \
937 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
938 } while (0)
940 /* x B^2 (mod n). Requires x > 0, n1 < B/2. */
941 #define redcify2(r1, r0, x, n1, n0) \
942 do { \
943 uintmax_t _r1, _r0, _i; \
944 if ((x) < (n1)) \
946 _r1 = (x); _r0 = 0; \
947 _i = W_TYPE_SIZE; \
949 else \
951 _r1 = 0; _r0 = (x); \
952 _i = 2 * W_TYPE_SIZE; \
954 while (_i-- > 0) \
956 lsh2 (_r1, _r0, _r1, _r0, 1); \
957 if (ge2 (_r1, _r0, (n1), (n0))) \
958 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
960 (r1) = _r1; \
961 (r0) = _r0; \
962 } while (0)
964 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
965 Both a and b must be in redc form, the result will be in redc form too. */
966 static inline uintmax_t
967 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
969 uintmax_t rh, rl, q, th, xh;
970 MAYBE_UNUSED uintmax_t tl;
972 umul_ppmm (rh, rl, a, b);
973 q = rl * mi;
974 umul_ppmm (th, tl, q, m);
975 xh = rh - th;
976 if (rh < th)
977 xh += m;
979 return xh;
982 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
983 Both a and b must be in redc form, the result will be in redc form too.
984 For performance reasons, the most significant bit of m must be clear. */
985 static uintmax_t
986 mulredc2 (uintmax_t *r1p,
987 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
988 uintmax_t m1, uintmax_t m0, uintmax_t mi)
990 uintmax_t r1, r0, q, p1, t1, t0, s1, s0;
991 MAYBE_UNUSED uintmax_t p0;
992 mi = -mi;
993 assert ((a1 >> (W_TYPE_SIZE - 1)) == 0);
994 assert ((b1 >> (W_TYPE_SIZE - 1)) == 0);
995 assert ((m1 >> (W_TYPE_SIZE - 1)) == 0);
997 /* First compute a0 * <b1, b0> B^{-1}
998 +-----+
999 |a0 b0|
1000 +--+--+--+
1001 |a0 b1|
1002 +--+--+--+
1003 |q0 m0|
1004 +--+--+--+
1005 |q0 m1|
1006 -+--+--+--+
1007 |r1|r0| 0|
1008 +--+--+--+
1010 umul_ppmm (t1, t0, a0, b0);
1011 umul_ppmm (r1, r0, a0, b1);
1012 q = mi * t0;
1013 umul_ppmm (p1, p0, q, m0);
1014 umul_ppmm (s1, s0, q, m1);
1015 r0 += (t0 != 0); /* Carry */
1016 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1017 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1018 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1020 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1021 +-----+
1022 |a1 b0|
1023 +--+--+
1024 |r1|r0|
1025 +--+--+--+
1026 |a1 b1|
1027 +--+--+--+
1028 |q1 m0|
1029 +--+--+--+
1030 |q1 m1|
1031 -+--+--+--+
1032 |r1|r0| 0|
1033 +--+--+--+
1035 umul_ppmm (t1, t0, a1, b0);
1036 umul_ppmm (s1, s0, a1, b1);
1037 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1038 q = mi * t0;
1039 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1040 umul_ppmm (p1, p0, q, m0);
1041 umul_ppmm (s1, s0, q, m1);
1042 r0 += (t0 != 0); /* Carry */
1043 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1044 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1045 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1047 if (ge2 (r1, r0, m1, m0))
1048 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1050 *r1p = r1;
1051 return r0;
1054 ATTRIBUTE_CONST
1055 static uintmax_t
1056 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1058 uintmax_t y = one;
1060 if (e & 1)
1061 y = b;
1063 while (e != 0)
1065 b = mulredc (b, b, n, ni);
1066 e >>= 1;
1068 if (e & 1)
1069 y = mulredc (y, b, n, ni);
1072 return y;
1075 static uintmax_t
1076 powm2 (uintmax_t *r1m,
1077 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1078 uintmax_t ni, const uintmax_t *one)
1080 uintmax_t r1, r0, b1, b0, n1, n0;
1081 unsigned int i;
1082 uintmax_t e;
1084 b0 = bp[0];
1085 b1 = bp[1];
1086 n0 = np[0];
1087 n1 = np[1];
1089 r0 = one[0];
1090 r1 = one[1];
1092 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1094 if (e & 1)
1096 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1097 r1 = *r1m;
1099 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1100 b1 = *r1m;
1102 for (e = ep[1]; e > 0; e >>= 1)
1104 if (e & 1)
1106 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1107 r1 = *r1m;
1109 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1110 b1 = *r1m;
1112 *r1m = r1;
1113 return r0;
1116 ATTRIBUTE_CONST
1117 static bool
1118 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1119 unsigned int k, uintmax_t one)
1121 uintmax_t y = powm (b, q, n, ni, one);
1123 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1125 if (y == one || y == nm1)
1126 return true;
1128 for (unsigned int i = 1; i < k; i++)
1130 y = mulredc (y, y, n, ni);
1132 if (y == nm1)
1133 return true;
1134 if (y == one)
1135 return false;
1137 return false;
1140 ATTRIBUTE_PURE static bool
1141 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1142 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1144 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1146 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1147 y1 = r1m;
1149 if (y0 == one[0] && y1 == one[1])
1150 return true;
1152 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1154 if (y0 == nm1_0 && y1 == nm1_1)
1155 return true;
1157 for (unsigned int i = 1; i < k; i++)
1159 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1160 y1 = r1m;
1162 if (y0 == nm1_0 && y1 == nm1_1)
1163 return true;
1164 if (y0 == one[0] && y1 == one[1])
1165 return false;
1167 return false;
1170 static bool
1171 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1172 mpz_srcptr q, unsigned long int k)
1174 mpz_powm (y, x, q, n);
1176 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1177 return true;
1179 for (unsigned long int i = 1; i < k; i++)
1181 mpz_powm_ui (y, y, 2, n);
1182 if (mpz_cmp (y, nm1) == 0)
1183 return true;
1184 if (mpz_cmp_ui (y, 1) == 0)
1185 return false;
1187 return false;
1190 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1191 have been observed. The average seem to be about 2. */
1192 static bool
1193 prime_p (uintmax_t n)
1195 int k;
1196 bool is_prime;
1197 uintmax_t a_prim, one, ni;
1198 struct factors factors;
1200 if (n <= 1)
1201 return false;
1203 /* We have already casted out small primes. */
1204 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1205 return true;
1207 /* Precomputation for Miller-Rabin. */
1208 uintmax_t q = n - 1;
1209 for (k = 0; (q & 1) == 0; k++)
1210 q >>= 1;
1212 uintmax_t a = 2;
1213 binv (ni, n); /* ni <- 1/n mod B */
1214 redcify (one, 1, n);
1215 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1217 /* Perform a Miller-Rabin test, finds most composites quickly. */
1218 if (!millerrabin (n, ni, a_prim, q, k, one))
1219 return false;
1221 if (flag_prove_primality)
1223 /* Factor n-1 for Lucas. */
1224 factor (0, n - 1, &factors);
1227 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1228 number composite. */
1229 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1231 if (flag_prove_primality)
1233 is_prime = true;
1234 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1236 is_prime
1237 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1240 else
1242 /* After enough Miller-Rabin runs, be content. */
1243 is_prime = (r == MR_REPS - 1);
1246 if (is_prime)
1247 return true;
1249 a += primes_diff[r]; /* Establish new base. */
1251 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1252 on most processors, since it avoids udiv_qrnnd. If we go down the
1253 udiv_qrnnd_preinv path, this code should be replaced. */
1255 uintmax_t s1, s0;
1256 umul_ppmm (s1, s0, one, a);
1257 if (LIKELY (s1 == 0))
1258 a_prim = s0 % n;
1259 else
1261 MAYBE_UNUSED uintmax_t dummy;
1262 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1266 if (!millerrabin (n, ni, a_prim, q, k, one))
1267 return false;
1270 error (0, 0, _("Lucas prime test failure. This should not happen"));
1271 abort ();
1274 static bool
1275 prime2_p (uintmax_t n1, uintmax_t n0)
1277 uintmax_t q[2], nm1[2];
1278 uintmax_t a_prim[2];
1279 uintmax_t one[2];
1280 uintmax_t na[2];
1281 uintmax_t ni;
1282 unsigned int k;
1283 struct factors factors;
1285 if (n1 == 0)
1286 return prime_p (n0);
1288 nm1[1] = n1 - (n0 == 0);
1289 nm1[0] = n0 - 1;
1290 if (nm1[0] == 0)
1292 count_trailing_zeros (k, nm1[1]);
1294 q[0] = nm1[1] >> k;
1295 q[1] = 0;
1296 k += W_TYPE_SIZE;
1298 else
1300 count_trailing_zeros (k, nm1[0]);
1301 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1304 uintmax_t a = 2;
1305 binv (ni, n0);
1306 redcify2 (one[1], one[0], 1, n1, n0);
1307 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1309 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1310 na[0] = n0;
1311 na[1] = n1;
1313 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1314 return false;
1316 if (flag_prove_primality)
1318 /* Factor n-1 for Lucas. */
1319 factor (nm1[1], nm1[0], &factors);
1322 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1323 number composite. */
1324 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1326 bool is_prime;
1327 uintmax_t e[2], y[2];
1329 if (flag_prove_primality)
1331 is_prime = true;
1332 if (factors.plarge[1])
1334 uintmax_t pi;
1335 binv (pi, factors.plarge[0]);
1336 e[0] = pi * nm1[0];
1337 e[1] = 0;
1338 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1339 is_prime = (y[0] != one[0] || y[1] != one[1]);
1341 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1343 /* FIXME: We always have the factor 2. Do we really need to
1344 handle it here? We have done the same powering as part
1345 of millerrabin. */
1346 if (factors.p[i] == 2)
1347 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1348 else
1349 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1350 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1351 is_prime = (y[0] != one[0] || y[1] != one[1]);
1354 else
1356 /* After enough Miller-Rabin runs, be content. */
1357 is_prime = (r == MR_REPS - 1);
1360 if (is_prime)
1361 return true;
1363 a += primes_diff[r]; /* Establish new base. */
1364 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1366 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1367 return false;
1370 error (0, 0, _("Lucas prime test failure. This should not happen"));
1371 abort ();
1374 static bool
1375 mp_prime_p (mpz_t n)
1377 bool is_prime;
1378 mpz_t q, a, nm1, tmp;
1379 struct mp_factors factors;
1381 if (mpz_cmp_ui (n, 1) <= 0)
1382 return false;
1384 /* We have already casted out small primes. */
1385 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1386 return true;
1388 mpz_inits (q, a, nm1, tmp, NULL);
1390 /* Precomputation for Miller-Rabin. */
1391 mpz_sub_ui (nm1, n, 1);
1393 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1394 unsigned long int k = mpz_scan1 (nm1, 0);
1395 mpz_tdiv_q_2exp (q, nm1, k);
1397 mpz_set_ui (a, 2);
1399 /* Perform a Miller-Rabin test, finds most composites quickly. */
1400 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1402 is_prime = false;
1403 goto ret2;
1406 if (flag_prove_primality)
1408 /* Factor n-1 for Lucas. */
1409 mpz_set (tmp, nm1);
1410 mp_factor (tmp, &factors);
1413 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1414 number composite. */
1415 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1417 if (flag_prove_primality)
1419 is_prime = true;
1420 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1422 mpz_divexact (tmp, nm1, factors.p[i]);
1423 mpz_powm (tmp, a, tmp, n);
1424 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1427 else
1429 /* After enough Miller-Rabin runs, be content. */
1430 is_prime = (r == MR_REPS - 1);
1433 if (is_prime)
1434 goto ret1;
1436 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1438 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1440 is_prime = false;
1441 goto ret1;
1445 error (0, 0, _("Lucas prime test failure. This should not happen"));
1446 abort ();
1448 ret1:
1449 if (flag_prove_primality)
1450 mp_factor_clear (&factors);
1451 ret2:
1452 mpz_clears (q, a, nm1, tmp, NULL);
1454 return is_prime;
1457 static void
1458 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1459 struct factors *factors)
1461 uintmax_t x, z, y, P, t, ni, g;
1463 unsigned long int k = 1;
1464 unsigned long int l = 1;
1466 redcify (P, 1, n);
1467 addmod (x, P, P, n); /* i.e., redcify(2) */
1468 y = z = x;
1470 while (n != 1)
1472 assert (a < n);
1474 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1476 for (;;)
1480 x = mulredc (x, x, n, ni);
1481 addmod (x, x, a, n);
1483 submod (t, z, x, n);
1484 P = mulredc (P, t, n, ni);
1486 if (k % 32 == 1)
1488 if (gcd_odd (P, n) != 1)
1489 goto factor_found;
1490 y = x;
1493 while (--k != 0);
1495 z = x;
1496 k = l;
1497 l = 2 * l;
1498 for (unsigned long int i = 0; i < k; i++)
1500 x = mulredc (x, x, n, ni);
1501 addmod (x, x, a, n);
1503 y = x;
1506 factor_found:
1509 y = mulredc (y, y, n, ni);
1510 addmod (y, y, a, n);
1512 submod (t, z, y, n);
1513 g = gcd_odd (t, n);
1515 while (g == 1);
1517 if (n == g)
1519 /* Found n itself as factor. Restart with different params. */
1520 factor_using_pollard_rho (n, a + 1, factors);
1521 return;
1524 n = n / g;
1526 if (!prime_p (g))
1527 factor_using_pollard_rho (g, a + 1, factors);
1528 else
1529 factor_insert (factors, g);
1531 if (prime_p (n))
1533 factor_insert (factors, n);
1534 break;
1537 x = x % n;
1538 z = z % n;
1539 y = y % n;
1543 static void
1544 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1545 struct factors *factors)
1547 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1549 unsigned long int k = 1;
1550 unsigned long int l = 1;
1552 redcify2 (P1, P0, 1, n1, n0);
1553 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1554 y1 = z1 = x1;
1555 y0 = z0 = x0;
1557 while (n1 != 0 || n0 != 1)
1559 binv (ni, n0);
1561 for (;;)
1565 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1566 x1 = r1m;
1567 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1569 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1570 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1571 P1 = r1m;
1573 if (k % 32 == 1)
1575 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1576 if (g1 != 0 || g0 != 1)
1577 goto factor_found;
1578 y1 = x1; y0 = x0;
1581 while (--k != 0);
1583 z1 = x1; z0 = x0;
1584 k = l;
1585 l = 2 * l;
1586 for (unsigned long int i = 0; i < k; i++)
1588 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1589 x1 = r1m;
1590 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1592 y1 = x1; y0 = x0;
1595 factor_found:
1598 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1599 y1 = r1m;
1600 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1602 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1603 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1605 while (g1 == 0 && g0 == 1);
1607 if (g1 == 0)
1609 /* The found factor is one word, and > 1. */
1610 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1612 if (!prime_p (g0))
1613 factor_using_pollard_rho (g0, a + 1, factors);
1614 else
1615 factor_insert (factors, g0);
1617 else
1619 /* The found factor is two words. This is highly unlikely, thus hard
1620 to trigger. Please be careful before you change this code! */
1621 uintmax_t ginv;
1623 if (n1 == g1 && n0 == g0)
1625 /* Found n itself as factor. Restart with different params. */
1626 factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1627 return;
1630 /* Compute n = n / g. Since the result will fit one word,
1631 we can compute the quotient modulo B, ignoring the high
1632 divisor word. */
1633 binv (ginv, g0);
1634 n0 = ginv * n0;
1635 n1 = 0;
1637 if (!prime2_p (g1, g0))
1638 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1639 else
1640 factor_insert_large (factors, g1, g0);
1643 if (n1 == 0)
1645 if (prime_p (n0))
1647 factor_insert (factors, n0);
1648 break;
1651 factor_using_pollard_rho (n0, a, factors);
1652 return;
1655 if (prime2_p (n1, n0))
1657 factor_insert_large (factors, n1, n0);
1658 break;
1661 x0 = mod2 (&x1, x1, x0, n1, n0);
1662 z0 = mod2 (&z1, z1, z0, n1, n0);
1663 y0 = mod2 (&y1, y1, y0, n1, n0);
1667 static void
1668 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1669 struct mp_factors *factors)
1671 mpz_t x, z, y, P;
1672 mpz_t t, t2;
1674 devmsg ("[pollard-rho (%lu)] ", a);
1676 mpz_inits (t, t2, NULL);
1677 mpz_init_set_si (y, 2);
1678 mpz_init_set_si (x, 2);
1679 mpz_init_set_si (z, 2);
1680 mpz_init_set_ui (P, 1);
1682 unsigned long long int k = 1;
1683 unsigned long long int l = 1;
1685 while (mpz_cmp_ui (n, 1) != 0)
1687 for (;;)
1691 mpz_mul (t, x, x);
1692 mpz_mod (x, t, n);
1693 mpz_add_ui (x, x, a);
1695 mpz_sub (t, z, x);
1696 mpz_mul (t2, P, t);
1697 mpz_mod (P, t2, n);
1699 if (k % 32 == 1)
1701 mpz_gcd (t, P, n);
1702 if (mpz_cmp_ui (t, 1) != 0)
1703 goto factor_found;
1704 mpz_set (y, x);
1707 while (--k != 0);
1709 mpz_set (z, x);
1710 k = l;
1711 l = 2 * l;
1712 for (unsigned long long int i = 0; i < k; i++)
1714 mpz_mul (t, x, x);
1715 mpz_mod (x, t, n);
1716 mpz_add_ui (x, x, a);
1718 mpz_set (y, x);
1721 factor_found:
1724 mpz_mul (t, y, y);
1725 mpz_mod (y, t, n);
1726 mpz_add_ui (y, y, a);
1728 mpz_sub (t, z, y);
1729 mpz_gcd (t, t, n);
1731 while (mpz_cmp_ui (t, 1) == 0);
1733 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1735 if (!mp_prime_p (t))
1737 devmsg ("[composite factor--restarting pollard-rho] ");
1738 mp_factor_using_pollard_rho (t, a + 1, factors);
1740 else
1742 mp_factor_insert (factors, t);
1745 if (mp_prime_p (n))
1747 mp_factor_insert (factors, n);
1748 break;
1751 mpz_mod (x, x, n);
1752 mpz_mod (z, z, n);
1753 mpz_mod (y, y, n);
1756 mpz_clears (P, t2, t, z, x, y, NULL);
1759 #if USE_SQUFOF
1760 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1761 algorithm is replaced, consider also returning the remainder. */
1762 ATTRIBUTE_CONST
1763 static uintmax_t
1764 isqrt (uintmax_t n)
1766 uintmax_t x;
1767 unsigned c;
1768 if (n == 0)
1769 return 0;
1771 count_leading_zeros (c, n);
1773 /* Make x > sqrt(n). This will be invariant through the loop. */
1774 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1776 for (;;)
1778 uintmax_t y = (x + n / x) / 2;
1779 if (y >= x)
1780 return x;
1782 x = y;
1786 ATTRIBUTE_CONST
1787 static uintmax_t
1788 isqrt2 (uintmax_t nh, uintmax_t nl)
1790 unsigned int shift;
1791 uintmax_t x;
1793 /* Ensures the remainder fits in an uintmax_t. */
1794 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1796 if (nh == 0)
1797 return isqrt (nl);
1799 count_leading_zeros (shift, nh);
1800 shift &= ~1;
1802 /* Make x > sqrt (n). */
1803 x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1804 x <<= (W_TYPE_SIZE - shift) / 2;
1806 /* Do we need more than one iteration? */
1807 for (;;)
1809 MAYBE_UNUSED uintmax_t r;
1810 uintmax_t q, y;
1811 udiv_qrnnd (q, r, nh, nl, x);
1812 y = (x + q) / 2;
1814 if (y >= x)
1816 uintmax_t hi, lo;
1817 umul_ppmm (hi, lo, x + 1, x + 1);
1818 assert (gt2 (hi, lo, nh, nl));
1820 umul_ppmm (hi, lo, x, x);
1821 assert (ge2 (nh, nl, hi, lo));
1822 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1823 assert (hi == 0);
1825 return x;
1828 x = y;
1832 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1833 # define MAGIC64 0x0202021202030213ULL
1834 # define MAGIC63 0x0402483012450293ULL
1835 # define MAGIC65 0x218a019866014613ULL
1836 # define MAGIC11 0x23b
1838 /* Return the square root if the input is a square, otherwise 0. */
1839 ATTRIBUTE_CONST
1840 static uintmax_t
1841 is_square (uintmax_t x)
1843 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1844 computing the square root. */
1845 if (((MAGIC64 >> (x & 63)) & 1)
1846 && ((MAGIC63 >> (x % 63)) & 1)
1847 /* Both 0 and 64 are squares mod (65). */
1848 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1849 && ((MAGIC11 >> (x % 11) & 1)))
1851 uintmax_t r = isqrt (x);
1852 if (r * r == x)
1853 return r;
1855 return 0;
1858 /* invtab[i] = floor (0x10000 / (0x100 + i) */
1859 static const unsigned short invtab[0x81] =
1861 0x200,
1862 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1863 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1864 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1865 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1866 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1867 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1868 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1869 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1870 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1871 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1872 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1873 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1874 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1875 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1876 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1877 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1880 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1881 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1882 # define div_smallq(q, r, u, d) \
1883 do { \
1884 if ((u) / 0x40 < (d)) \
1886 int _cnt; \
1887 uintmax_t _dinv, _mask, _q, _r; \
1888 count_leading_zeros (_cnt, (d)); \
1889 _r = (u); \
1890 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1892 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1893 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1895 else \
1897 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1898 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1900 _r -= _q * (d); \
1902 _mask = -(uintmax_t) (_r >= (d)); \
1903 (r) = _r - (_mask & (d)); \
1904 (q) = _q - _mask; \
1905 assert ((q) * (d) + (r) == u); \
1907 else \
1909 uintmax_t _q = (u) / (d); \
1910 (r) = (u) - _q * (d); \
1911 (q) = _q; \
1913 } while (0)
1915 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1916 = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
1917 with 3025 = 55^2.
1919 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1920 representing G_0 = (-55, 2 * 4652, 8653).
1922 In the notation of the paper:
1924 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1928 t_0 = floor([q_0 + R_0] / S0) = 1
1929 R_1 = t_0 * S_0 - R_0 = 4001
1930 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1933 /* Multipliers, in order of efficiency:
1934 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1935 0.7317 3*5*7 = 105 = 1
1936 0.7820 3*5*11 = 165 = 1
1937 0.7872 3*5 = 15 = 3
1938 0.8101 3*7*11 = 231 = 3
1939 0.8155 3*7 = 21 = 1
1940 0.8284 5*7*11 = 385 = 1
1941 0.8339 5*7 = 35 = 3
1942 0.8716 3*11 = 33 = 1
1943 0.8774 3 = 3 = 3
1944 0.8913 5*11 = 55 = 3
1945 0.8972 5 = 5 = 1
1946 0.9233 7*11 = 77 = 1
1947 0.9295 7 = 7 = 3
1948 0.9934 11 = 11 = 3
1950 # define QUEUE_SIZE 50
1951 #endif
1953 #if STAT_SQUFOF
1954 # define Q_FREQ_SIZE 50
1955 /* Element 0 keeps the total */
1956 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1957 #endif
1959 #if USE_SQUFOF
1960 /* Return true on success. Expected to fail only for numbers
1961 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1962 static bool
1963 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1965 /* Uses algorithm and notation from
1967 SQUARE FORM FACTORIZATION
1968 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1970 https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1973 static const unsigned int multipliers_1[] =
1974 { /* = 1 (mod 4) */
1975 105, 165, 21, 385, 33, 5, 77, 1, 0
1977 static const unsigned int multipliers_3[] =
1978 { /* = 3 (mod 4) */
1979 1155, 15, 231, 35, 3, 55, 7, 11, 0
1982 const unsigned int *m;
1984 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1986 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1987 return false;
1989 uintmax_t sqrt_n = isqrt2 (n1, n0);
1991 if (n0 == sqrt_n * sqrt_n)
1993 uintmax_t p1, p0;
1995 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1996 assert (p0 == n0);
1998 if (n1 == p1)
2000 if (prime_p (sqrt_n))
2001 factor_insert_multiplicity (factors, sqrt_n, 2);
2002 else
2004 struct factors f;
2006 f.nfactors = 0;
2007 if (!factor_using_squfof (0, sqrt_n, &f))
2009 /* Try pollard rho instead */
2010 factor_using_pollard_rho (sqrt_n, 1, &f);
2012 /* Duplicate the new factors */
2013 for (unsigned int i = 0; i < f.nfactors; i++)
2014 factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
2016 return true;
2020 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2021 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2022 *m; m++)
2024 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2025 unsigned int i;
2026 unsigned int mu = *m;
2027 unsigned int qpos = 0;
2029 assert (mu * n0 % 4 == 3);
2031 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2032 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2033 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2034 mu^3 < n.
2036 However, this seems insufficient: With n = 37243139 and mu =
2037 105, we get a trivial factor, from the square 38809 = 197^2,
2038 without any corresponding Q earlier in the iteration.
2040 Requiring 64 mu^3 < n seems sufficient. */
2041 if (n1 == 0)
2043 if ((uintmax_t) mu * mu * mu >= n0 / 64)
2044 continue;
2046 else
2048 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2049 continue;
2051 umul_ppmm (Dh, Dl, n0, mu);
2052 Dh += n1 * mu;
2054 assert (Dl % 4 != 1);
2055 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2057 S = isqrt2 (Dh, Dl);
2059 Q1 = 1;
2060 P = S;
2062 /* Square root remainder fits in one word, so ignore high part. */
2063 Q = Dl - P * P;
2064 /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */
2065 L = isqrt (2 * S);
2066 B = 2 * L;
2067 L1 = mu * 2 * L;
2069 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2070 4 D. */
2072 for (i = 0; i <= B; i++)
2074 uintmax_t q, P1, t, rem;
2076 div_smallq (q, rem, S + P, Q);
2077 P1 = S - rem; /* P1 = q*Q - P */
2079 IF_LINT (assert (q > 0 && Q > 0));
2081 # if STAT_SQUFOF
2082 q_freq[0]++;
2083 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2084 # endif
2086 if (Q <= L1)
2088 uintmax_t g = Q;
2090 if ((Q & 1) == 0)
2091 g /= 2;
2093 g /= gcd_odd (g, mu);
2095 if (g <= L)
2097 if (qpos >= QUEUE_SIZE)
2098 die (EXIT_FAILURE, 0, _("squfof queue overflow"));
2099 queue[qpos].Q = g;
2100 queue[qpos].P = P % g;
2101 qpos++;
2105 /* I think the difference can be either sign, but mod
2106 2^W_TYPE_SIZE arithmetic should be fine. */
2107 t = Q1 + q * (P - P1);
2108 Q1 = Q;
2109 Q = t;
2110 P = P1;
2112 if ((i & 1) == 0)
2114 uintmax_t r = is_square (Q);
2115 if (r)
2117 for (unsigned int j = 0; j < qpos; j++)
2119 if (queue[j].Q == r)
2121 if (r == 1)
2122 /* Traversed entire cycle. */
2123 goto next_multiplier;
2125 /* Need the absolute value for divisibility test. */
2126 if (P >= queue[j].P)
2127 t = P - queue[j].P;
2128 else
2129 t = queue[j].P - P;
2130 if (t % r == 0)
2132 /* Delete entries up to and including entry
2133 j, which matched. */
2134 memmove (queue, queue + j + 1,
2135 (qpos - j - 1) * sizeof (queue[0]));
2136 qpos -= (j + 1);
2138 goto next_i;
2142 /* We have found a square form, which should give a
2143 factor. */
2144 Q1 = r;
2145 assert (S >= P); /* What signs are possible? */
2146 P += r * ((S - P) / r);
2148 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2149 for the case D = 2N. */
2150 /* Compute Q = (D - P*P) / Q1, but we need double
2151 precision. */
2152 uintmax_t hi, lo;
2153 umul_ppmm (hi, lo, P, P);
2154 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2155 udiv_qrnnd (Q, rem, hi, lo, Q1);
2156 assert (rem == 0);
2158 for (;;)
2160 /* Note: There appears to by a typo in the paper,
2161 Step 4a in the algorithm description says q <--
2162 floor([S+P]/\hat Q), but looking at the equations
2163 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2164 (In this code, \hat Q is Q1). */
2165 div_smallq (q, rem, S + P, Q);
2166 P1 = S - rem; /* P1 = q*Q - P */
2168 # if STAT_SQUFOF
2169 q_freq[0]++;
2170 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2171 # endif
2172 if (P == P1)
2173 break;
2174 t = Q1 + q * (P - P1);
2175 Q1 = Q;
2176 Q = t;
2177 P = P1;
2180 if ((Q & 1) == 0)
2181 Q /= 2;
2182 Q /= gcd_odd (Q, mu);
2184 assert (Q > 1 && (n1 || Q < n0));
2186 if (prime_p (Q))
2187 factor_insert (factors, Q);
2188 else if (!factor_using_squfof (0, Q, factors))
2189 factor_using_pollard_rho (Q, 2, factors);
2191 divexact_21 (n1, n0, n1, n0, Q);
2193 if (prime2_p (n1, n0))
2194 factor_insert_large (factors, n1, n0);
2195 else
2197 if (!factor_using_squfof (n1, n0, factors))
2199 if (n1 == 0)
2200 factor_using_pollard_rho (n0, 1, factors);
2201 else
2202 factor_using_pollard_rho2 (n1, n0, 1, factors);
2206 return true;
2209 next_i:;
2211 next_multiplier:;
2213 return false;
2215 #endif
2217 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2218 results in FACTORS. */
2219 static void
2220 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2222 factors->nfactors = 0;
2223 factors->plarge[1] = 0;
2225 if (t1 == 0 && t0 < 2)
2226 return;
2228 t0 = factor_using_division (&t1, t1, t0, factors);
2230 if (t1 == 0 && t0 < 2)
2231 return;
2233 if (prime2_p (t1, t0))
2234 factor_insert_large (factors, t1, t0);
2235 else
2237 #if USE_SQUFOF
2238 if (factor_using_squfof (t1, t0, factors))
2239 return;
2240 #endif
2242 if (t1 == 0)
2243 factor_using_pollard_rho (t0, 1, factors);
2244 else
2245 factor_using_pollard_rho2 (t1, t0, 1, factors);
2249 /* Use Pollard-rho to compute the prime factors of
2250 arbitrary-precision T, and put the results in FACTORS. */
2251 static void
2252 mp_factor (mpz_t t, struct mp_factors *factors)
2254 mp_factor_init (factors);
2256 if (mpz_sgn (t) != 0)
2258 mp_factor_using_division (t, factors);
2260 if (mpz_cmp_ui (t, 1) != 0)
2262 devmsg ("[is number prime?] ");
2263 if (mp_prime_p (t))
2264 mp_factor_insert (factors, t);
2265 else
2266 mp_factor_using_pollard_rho (t, 1, factors);
2271 static strtol_error
2272 strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s)
2274 unsigned int lo_carry;
2275 uintmax_t hi = 0, lo = 0;
2277 strtol_error err = LONGINT_INVALID;
2279 /* Initial scan for invalid digits. */
2280 char const *p = s;
2281 for (;;)
2283 unsigned int c = *p++;
2284 if (c == 0)
2285 break;
2287 if (UNLIKELY (!ISDIGIT (c)))
2289 err = LONGINT_INVALID;
2290 break;
2293 err = LONGINT_OK; /* we've seen at least one valid digit */
2296 while (err == LONGINT_OK)
2298 unsigned int c = *s++;
2299 if (c == 0)
2300 break;
2302 c -= '0';
2304 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2306 err = LONGINT_OVERFLOW;
2307 break;
2309 hi = 10 * hi;
2311 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2312 lo_carry += 10 * lo < 2 * lo;
2314 lo = 10 * lo;
2315 lo += c;
2317 lo_carry += lo < c;
2318 hi += lo_carry;
2319 if (UNLIKELY (hi < lo_carry))
2321 err = LONGINT_OVERFLOW;
2322 break;
2326 *hip = hi;
2327 *lop = lo;
2329 return err;
2332 /* Structure and routines for buffering and outputting full lines,
2333 to support parallel operation efficiently. */
2334 static struct lbuf_
2336 char *buf;
2337 char *end;
2338 } lbuf;
2340 /* 512 is chosen to give good performance,
2341 and also is the max guaranteed size that
2342 consumers can read atomically through pipes.
2343 Also it's big enough to cater for max line length
2344 even with 128 bit uintmax_t. */
2345 #define FACTOR_PIPE_BUF 512
2347 static void
2348 lbuf_alloc (void)
2350 if (lbuf.buf)
2351 return;
2353 /* Double to ensure enough space for
2354 previous numbers + next number. */
2355 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2356 lbuf.end = lbuf.buf;
2359 /* Write complete LBUF to standard output. */
2360 static void
2361 lbuf_flush (void)
2363 size_t size = lbuf.end - lbuf.buf;
2364 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2365 die (EXIT_FAILURE, errno, "%s", _("write error"));
2366 lbuf.end = lbuf.buf;
2369 /* Add a character C to LBUF and if it's a newline
2370 and enough bytes are already buffered,
2371 then write atomically to standard output. */
2372 static void
2373 lbuf_putc (char c)
2375 *lbuf.end++ = c;
2377 if (c == '\n')
2379 size_t buffered = lbuf.end - lbuf.buf;
2381 /* Provide immediate output for interactive use. */
2382 static int line_buffered = -1;
2383 if (line_buffered == -1)
2384 line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO);
2385 if (line_buffered)
2386 lbuf_flush ();
2387 else if (buffered >= FACTOR_PIPE_BUF)
2389 /* Write output in <= PIPE_BUF chunks
2390 so consumers can read atomically. */
2391 char const *tend = lbuf.end;
2393 /* Since a umaxint_t's factors must fit in 512
2394 we're guaranteed to find a newline here. */
2395 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2396 while (*--tlend != '\n');
2397 tlend++;
2399 lbuf.end = tlend;
2400 lbuf_flush ();
2402 /* Buffer the remainder. */
2403 memcpy (lbuf.buf, tlend, tend - tlend);
2404 lbuf.end = lbuf.buf + (tend - tlend);
2409 /* Buffer an int to the internal LBUF. */
2410 static void
2411 lbuf_putint (uintmax_t i, size_t min_width)
2413 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2414 char const *umaxstr = umaxtostr (i, buf);
2415 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2416 size_t z = width;
2418 for (; z < min_width; z++)
2419 *lbuf.end++ = '0';
2421 memcpy (lbuf.end, umaxstr, width);
2422 lbuf.end += width;
2425 static void
2426 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2428 uintmax_t q, r;
2430 if (t1 == 0)
2431 lbuf_putint (t0, 0);
2432 else
2434 /* Use very plain code here since it seems hard to write fast code
2435 without assuming a specific word size. */
2436 q = t1 / 1000000000;
2437 r = t1 % 1000000000;
2438 udiv_qrnnd (t0, r, r, t0, 1000000000);
2439 print_uintmaxes (q, t0);
2440 lbuf_putint (r, 9);
2444 /* Single-precision factoring */
2445 static void
2446 print_factors_single (uintmax_t t1, uintmax_t t0)
2448 struct factors factors;
2450 print_uintmaxes (t1, t0);
2451 lbuf_putc (':');
2453 factor (t1, t0, &factors);
2455 for (unsigned int j = 0; j < factors.nfactors; j++)
2456 for (unsigned int k = 0; k < factors.e[j]; k++)
2458 lbuf_putc (' ');
2459 print_uintmaxes (0, factors.p[j]);
2462 if (factors.plarge[1])
2464 lbuf_putc (' ');
2465 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2468 lbuf_putc ('\n');
2471 /* Emit the factors of the indicated number. If we have the option of using
2472 either algorithm, we select on the basis of the length of the number.
2473 For longer numbers, we prefer the MP algorithm even if the native algorithm
2474 has enough digits, because the algorithm is better. The turnover point
2475 depends on the value. */
2476 static bool
2477 print_factors (char const *input)
2479 /* Skip initial spaces and '+'. */
2480 char const *str = input;
2481 while (*str == ' ')
2482 str++;
2483 str += *str == '+';
2485 uintmax_t t1, t0;
2487 /* Try converting the number to one or two words. If it fails, use GMP or
2488 print an error message. The 2nd condition checks that the most
2489 significant bit of the two-word number is clear, in a typesize neutral
2490 way. */
2491 strtol_error err = strto2uintmax (&t1, &t0, str);
2493 switch (err)
2495 case LONGINT_OK:
2496 if (((t1 << 1) >> 1) == t1)
2498 devmsg ("[using single-precision arithmetic] ");
2499 print_factors_single (t1, t0);
2500 return true;
2502 break;
2504 case LONGINT_OVERFLOW:
2505 /* Try GMP. */
2506 break;
2508 default:
2509 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2510 return false;
2513 devmsg ("[using arbitrary-precision arithmetic] ");
2514 mpz_t t;
2515 struct mp_factors factors;
2517 mpz_init_set_str (t, str, 10);
2519 mpz_out_str (stdout, 10, t);
2520 putchar (':');
2521 mp_factor (t, &factors);
2523 for (unsigned int j = 0; j < factors.nfactors; j++)
2524 for (unsigned int k = 0; k < factors.e[j]; k++)
2526 putchar (' ');
2527 mpz_out_str (stdout, 10, factors.p[j]);
2530 mp_factor_clear (&factors);
2531 mpz_clear (t);
2532 putchar ('\n');
2533 fflush (stdout);
2534 return true;
2537 void
2538 usage (int status)
2540 if (status != EXIT_SUCCESS)
2541 emit_try_help ();
2542 else
2544 printf (_("\
2545 Usage: %s [NUMBER]...\n\
2546 or: %s OPTION\n\
2548 program_name, program_name);
2549 fputs (_("\
2550 Print the prime factors of each specified integer NUMBER. If none\n\
2551 are specified on the command line, read them from standard input.\n\
2553 "), stdout);
2554 fputs (HELP_OPTION_DESCRIPTION, stdout);
2555 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2556 emit_ancillary_info (PROGRAM_NAME);
2558 exit (status);
2561 static bool
2562 do_stdin (void)
2564 bool ok = true;
2565 token_buffer tokenbuffer;
2567 init_tokenbuffer (&tokenbuffer);
2569 while (true)
2571 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2572 &tokenbuffer);
2573 if (token_length == (size_t) -1)
2574 break;
2575 ok &= print_factors (tokenbuffer.buffer);
2577 free (tokenbuffer.buffer);
2579 return ok;
2583 main (int argc, char **argv)
2585 initialize_main (&argc, &argv);
2586 set_program_name (argv[0]);
2587 setlocale (LC_ALL, "");
2588 bindtextdomain (PACKAGE, LOCALEDIR);
2589 textdomain (PACKAGE);
2591 lbuf_alloc ();
2592 atexit (close_stdout);
2593 atexit (lbuf_flush);
2595 int c;
2596 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2598 switch (c)
2600 case DEV_DEBUG_OPTION:
2601 dev_debug = true;
2602 break;
2604 case_GETOPT_HELP_CHAR;
2606 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2608 default:
2609 usage (EXIT_FAILURE);
2613 #if STAT_SQUFOF
2614 memset (q_freq, 0, sizeof (q_freq));
2615 #endif
2617 bool ok;
2618 if (argc <= optind)
2619 ok = do_stdin ();
2620 else
2622 ok = true;
2623 for (int i = optind; i < argc; i++)
2624 if (! print_factors (argv[i]))
2625 ok = false;
2628 #if STAT_SQUFOF
2629 if (q_freq[0] > 0)
2631 double acc_f;
2632 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2633 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2635 double f = (double) q_freq[i] / q_freq[0];
2636 acc_f += f;
2637 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2638 100.0 * f, 100.0 * acc_f);
2641 #endif
2643 return ok ? EXIT_SUCCESS : EXIT_FAILURE;