uptime: be more generous about read_utmp failure
[coreutils.git] / src / factor.c
blob9963a5df2435daeb393c6abdc7755e48c50a9c88
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Algorithm:
40 (1) Perform trial division using a small primes table, but without hardware
41 division since the primes table store inverses modulo the word base.
42 (The GMP variant of this code doesn't make use of the precomputed
43 inverses, but instead relies on GMP for fast divisibility testing.)
44 (2) Check the nature of any non-factored part using Miller-Rabin for
45 detecting composites, and Lucas for detecting primes.
46 (3) Factor any remaining composite part using the Pollard-Brent rho
47 algorithm or if USE_SQUFOF is defined to 1, try that first.
48 Status of found factors are checked again using Miller-Rabin and Lucas.
50 We prefer using Hensel norm in the divisions, not the more familiar
51 Euclidian norm, since the former leads to much faster code. In the
52 Pollard-Brent rho code and the prime testing code, we use Montgomery's
53 trick of multiplying all n-residues by the word base, allowing cheap Hensel
54 reductions mod n.
56 The GMP code uses an algorithm that can be considerably slower;
57 for example, on a circa-2017 Intel Xeon Silver 4116, factoring
58 2^{127}-3 takes about 50 ms with the two-word algorithm but would
59 take about 750 ms with the GMP code.
61 Improvements:
63 * Use modular inverses also for exact division in the Lucas code, and
64 elsewhere. A problem is to locate the inverses not from an index, but
65 from a prime. We might instead compute the inverse on-the-fly.
67 * Tune trial division table size (not forgetting that this is a standalone
68 program where the table will be read from secondary storage for
69 each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #include <gmp.h>
109 #include "system.h"
110 #include "assure.h"
111 #include "full-write.h"
112 #include "quote.h"
113 #include "readtokens.h"
114 #include "xstrtol.h"
116 /* The official name of this program (e.g., no 'g' prefix). */
117 #define PROGRAM_NAME "factor"
119 #define AUTHORS \
120 proper_name ("Paul Rubin"), \
121 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
122 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
124 /* Token delimiters when reading from a file. */
125 #define DELIM "\n\t "
127 #ifndef USE_LONGLONG_H
128 /* With the way we use longlong.h, it's only safe to use
129 when UWtype = UHWtype, as there were various cases
130 (as can be seen in the history for longlong.h) where
131 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
132 to avoid compile time or run time issues. */
133 # if LONG_MAX == INTMAX_MAX
134 # define USE_LONGLONG_H 1
135 # endif
136 #endif
138 #if USE_LONGLONG_H
140 /* Make definitions for longlong.h to make it do what it can do for us */
142 /* bitcount for uintmax_t */
143 # if UINTMAX_MAX == UINT32_MAX
144 # define W_TYPE_SIZE 32
145 # elif UINTMAX_MAX == UINT64_MAX
146 # define W_TYPE_SIZE 64
147 # elif UINTMAX_MAX == UINT128_MAX
148 # define W_TYPE_SIZE 128
149 # endif
151 # define UWtype uintmax_t
152 # define UHWtype unsigned long int
153 # undef UDWtype
154 # if HAVE_ATTRIBUTE_MODE
155 typedef unsigned int UQItype __attribute__ ((mode (QI)));
156 typedef int SItype __attribute__ ((mode (SI)));
157 typedef unsigned int USItype __attribute__ ((mode (SI)));
158 typedef int DItype __attribute__ ((mode (DI)));
159 typedef unsigned int UDItype __attribute__ ((mode (DI)));
160 # else
161 typedef unsigned char UQItype;
162 typedef long SItype;
163 typedef unsigned long int USItype;
164 # if HAVE_LONG_LONG_INT
165 typedef long long int DItype;
166 typedef unsigned long long int UDItype;
167 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
168 typedef long int DItype;
169 typedef unsigned long int UDItype;
170 # endif
171 # endif
172 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
173 # define ASSERT(x) /* FIXME make longlong.h really standalone */
174 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
175 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
176 # ifndef __GMP_GNUC_PREREQ
177 # define __GMP_GNUC_PREREQ(a,b) 1
178 # endif
180 /* These stub macros are only used in longlong.h in certain system compiler
181 combinations, so ensure usage to avoid -Wunused-macros warnings. */
182 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
183 ASSERT (1)
184 __GMP_DECLSPEC
185 # endif
187 # if _ARCH_PPC
188 # define HAVE_HOST_CPU_FAMILY_powerpc 1
189 # endif
190 # include "longlong.h"
191 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
192 const unsigned char factor_clz_tab[129] =
194 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
195 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
196 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
197 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
200 # endif
202 #else /* not USE_LONGLONG_H */
204 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
205 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
206 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
207 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
209 #endif
211 #if !defined __clz_tab && !defined UHWtype
212 /* Without this seemingly useless conditional, gcc -Wunused-macros
213 warns that each of the two tested macros is unused on Fedora 18.
214 FIXME: this is just an ugly band-aid. Fix it properly. */
215 #endif
217 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
218 #define MAX_NFACTS 26
220 enum
222 DEV_DEBUG_OPTION = CHAR_MAX + 1
225 static struct option const long_options[] =
227 {"exponents", no_argument, nullptr, 'h'},
228 {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION},
229 {GETOPT_HELP_OPTION_DECL},
230 {GETOPT_VERSION_OPTION_DECL},
231 {nullptr, 0, nullptr, 0}
234 /* If true, use p^e output format. */
235 static bool print_exponents;
237 struct factors
239 uintmax_t plarge[2]; /* Can have a single large factor */
240 uintmax_t p[MAX_NFACTS];
241 unsigned char e[MAX_NFACTS];
242 unsigned char nfactors;
245 struct mp_factors
247 mpz_t *p;
248 unsigned long int *e;
249 idx_t nfactors;
252 static void factor (uintmax_t, uintmax_t, struct factors *);
254 #ifndef umul_ppmm
255 # define umul_ppmm(w1, w0, u, v) \
256 do { \
257 uintmax_t __x0, __x1, __x2, __x3; \
258 unsigned long int __ul, __vl, __uh, __vh; \
259 uintmax_t __u = (u), __v = (v); \
261 __ul = __ll_lowpart (__u); \
262 __uh = __ll_highpart (__u); \
263 __vl = __ll_lowpart (__v); \
264 __vh = __ll_highpart (__v); \
266 __x0 = (uintmax_t) __ul * __vl; \
267 __x1 = (uintmax_t) __ul * __vh; \
268 __x2 = (uintmax_t) __uh * __vl; \
269 __x3 = (uintmax_t) __uh * __vh; \
271 __x1 += __ll_highpart (__x0);/* This can't give carry. */ \
272 __x1 += __x2; /* But this indeed can. */ \
273 if (__x1 < __x2) /* Did we get it? */ \
274 __x3 += __ll_B; /* Yes, add it in the proper pos. */ \
276 (w1) = __x3 + __ll_highpart (__x1); \
277 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
278 } while (0)
279 #endif
281 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
282 /* Define our own, not needing normalization. This function is
283 currently not performance critical, so keep it simple. Similar to
284 the mod macro below. */
285 # undef udiv_qrnnd
286 # define udiv_qrnnd(q, r, n1, n0, d) \
287 do { \
288 uintmax_t __d1, __d0, __q, __r1, __r0; \
290 __d1 = (d); __d0 = 0; \
291 __r1 = (n1); __r0 = (n0); \
292 affirm (__r1 < __d1); \
293 __q = 0; \
294 for (int __i = W_TYPE_SIZE; __i > 0; __i--) \
296 rsh2 (__d1, __d0, __d1, __d0, 1); \
297 __q <<= 1; \
298 if (ge2 (__r1, __r0, __d1, __d0)) \
300 __q++; \
301 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
304 (r) = __r0; \
305 (q) = __q; \
306 } while (0)
307 #endif
309 #if !defined add_ssaaaa
310 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
311 do { \
312 uintmax_t _add_x; \
313 _add_x = (al) + (bl); \
314 (sh) = (ah) + (bh) + (_add_x < (al)); \
315 (sl) = _add_x; \
316 } while (0)
317 #endif
319 #define rsh2(rh, rl, ah, al, cnt) \
320 do { \
321 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
322 (rh) = (ah) >> (cnt); \
323 } while (0)
325 #define lsh2(rh, rl, ah, al, cnt) \
326 do { \
327 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
328 (rl) = (al) << (cnt); \
329 } while (0)
331 #define ge2(ah, al, bh, bl) \
332 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
334 #define gt2(ah, al, bh, bl) \
335 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
337 #ifndef sub_ddmmss
338 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
339 do { \
340 uintmax_t _cy; \
341 _cy = (al) < (bl); \
342 (rl) = (al) - (bl); \
343 (rh) = (ah) - (bh) - _cy; \
344 } while (0)
345 #endif
347 #ifndef count_leading_zeros
348 # define count_leading_zeros(count, x) do { \
349 uintmax_t __clz_x = (x); \
350 int __clz_c; \
351 for (__clz_c = 0; \
352 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
353 __clz_c += 8) \
354 __clz_x <<= 8; \
355 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
356 __clz_x <<= 1; \
357 (count) = __clz_c; \
358 } while (0)
359 #endif
361 #ifndef count_trailing_zeros
362 # define count_trailing_zeros(count, x) do { \
363 uintmax_t __ctz_x = (x); \
364 int __ctz_c = 0; \
365 while ((__ctz_x & 1) == 0) \
367 __ctz_x >>= 1; \
368 __ctz_c++; \
370 (count) = __ctz_c; \
371 } while (0)
372 #endif
374 /* Requires that a < n and b <= n */
375 #define submod(r,a,b,n) \
376 do { \
377 uintmax_t _t = - (uintmax_t) (a < b); \
378 (r) = ((n) & _t) + (a) - (b); \
379 } while (0)
381 #define addmod(r,a,b,n) \
382 submod ((r), (a), ((n) - (b)), (n))
384 /* Modular two-word addition and subtraction. For performance reasons, the
385 most significant bit of n1 must be clear. The destination variables must be
386 distinct from the mod operand. */
387 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
388 do { \
389 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
390 if (ge2 ((r1), (r0), (n1), (n0))) \
391 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
392 } while (0)
393 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
394 do { \
395 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
396 if ((intmax_t) (r1) < 0) \
397 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
398 } while (0)
400 #define HIGHBIT_TO_MASK(x) \
401 (((intmax_t)-1 >> 1) < 0 \
402 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
403 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
404 ? UINTMAX_MAX : (uintmax_t) 0))
406 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
407 Requires that d1 != 0. */
408 static uintmax_t
409 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
411 int cntd, cnta;
413 affirm (d1 != 0);
415 if (a1 == 0)
417 *r1 = 0;
418 return a0;
421 count_leading_zeros (cntd, d1);
422 count_leading_zeros (cnta, a1);
423 int cnt = cntd - cnta;
424 lsh2 (d1, d0, d1, d0, cnt);
425 for (int i = 0; i < cnt; i++)
427 if (ge2 (a1, a0, d1, d0))
428 sub_ddmmss (a1, a0, a1, a0, d1, d0);
429 rsh2 (d1, d0, d1, d0, 1);
432 *r1 = a1;
433 return a0;
436 ATTRIBUTE_CONST
437 static uintmax_t
438 gcd_odd (uintmax_t a, uintmax_t b)
440 if ((b & 1) == 0)
442 uintmax_t t = b;
443 b = a;
444 a = t;
446 if (a == 0)
447 return b;
449 /* Take out least significant one bit, to make room for sign */
450 b >>= 1;
452 for (;;)
454 uintmax_t t;
455 uintmax_t bgta;
457 while ((a & 1) == 0)
458 a >>= 1;
459 a >>= 1;
461 t = a - b;
462 if (t == 0)
463 return (a << 1) + 1;
465 bgta = HIGHBIT_TO_MASK (t);
467 /* b <-- min (a, b) */
468 b += (bgta & t);
470 /* a <-- |a - b| */
471 a = (t ^ bgta) - bgta;
475 static uintmax_t
476 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
478 affirm (b0 & 1);
480 if ((a0 | a1) == 0)
482 *r1 = b1;
483 return b0;
486 while ((a0 & 1) == 0)
487 rsh2 (a1, a0, a1, a0, 1);
489 for (;;)
491 if ((b1 | a1) == 0)
493 *r1 = 0;
494 return gcd_odd (b0, a0);
497 if (gt2 (a1, a0, b1, b0))
499 sub_ddmmss (a1, a0, a1, a0, b1, b0);
501 rsh2 (a1, a0, a1, a0, 1);
502 while ((a0 & 1) == 0);
504 else if (gt2 (b1, b0, a1, a0))
506 sub_ddmmss (b1, b0, b1, b0, a1, a0);
508 rsh2 (b1, b0, b1, b0, 1);
509 while ((b0 & 1) == 0);
511 else
512 break;
515 *r1 = a1;
516 return a0;
519 static void
520 factor_insert_multiplicity (struct factors *factors,
521 uintmax_t prime, int m)
523 int nfactors = factors->nfactors;
524 uintmax_t *p = factors->p;
525 unsigned char *e = factors->e;
527 /* Locate position for insert new or increment e. */
528 int i;
529 for (i = nfactors - 1; i >= 0; i--)
531 if (p[i] <= prime)
532 break;
535 if (i < 0 || p[i] != prime)
537 for (int j = nfactors - 1; j > i; j--)
539 p[j + 1] = p[j];
540 e[j + 1] = e[j];
542 p[i + 1] = prime;
543 e[i + 1] = m;
544 factors->nfactors = nfactors + 1;
546 else
548 e[i] += m;
552 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
554 static void
555 factor_insert_large (struct factors *factors,
556 uintmax_t p1, uintmax_t p0)
558 if (p1 > 0)
560 affirm (factors->plarge[1] == 0);
561 factors->plarge[0] = p0;
562 factors->plarge[1] = p1;
564 else
565 factor_insert (factors, p0);
568 #ifndef mpz_inits
570 # include <stdarg.h>
572 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
573 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
575 static void
576 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
578 va_list ap;
580 va_start (ap, mpz_single_init);
582 mpz_t *mpz;
583 while ((mpz = va_arg (ap, mpz_t *)))
584 mpz_single_init (*mpz);
586 va_end (ap);
588 #endif
590 static void mp_factor (mpz_t, struct mp_factors *);
592 static void
593 mp_factor_init (struct mp_factors *factors)
595 factors->p = nullptr;
596 factors->e = nullptr;
597 factors->nfactors = 0;
600 static void
601 mp_factor_clear (struct mp_factors *factors)
603 for (idx_t i = 0; i < factors->nfactors; i++)
604 mpz_clear (factors->p[i]);
606 free (factors->p);
607 free (factors->e);
610 static void
611 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
613 idx_t nfactors = factors->nfactors;
614 mpz_t *p = factors->p;
615 unsigned long int *e = factors->e;
616 ptrdiff_t i;
618 /* Locate position for insert new or increment e. */
619 for (i = nfactors - 1; i >= 0; i--)
621 if (mpz_cmp (p[i], prime) <= 0)
622 break;
625 if (i < 0 || mpz_cmp (p[i], prime) != 0)
627 p = xireallocarray (p, nfactors + 1, sizeof p[0]);
628 e = xireallocarray (e, nfactors + 1, sizeof e[0]);
630 mpz_init (p[nfactors]);
631 for (long j = nfactors - 1; j > i; j--)
633 mpz_set (p[j + 1], p[j]);
634 e[j + 1] = e[j];
636 mpz_set (p[i + 1], prime);
637 e[i + 1] = 1;
639 factors->p = p;
640 factors->e = e;
641 factors->nfactors = nfactors + 1;
643 else
645 e[i] += 1;
649 static void
650 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
652 mpz_t pz;
654 mpz_init_set_ui (pz, prime);
655 mp_factor_insert (factors, pz);
656 mpz_clear (pz);
660 /* Number of bits in an uintmax_t. */
661 enum { W = sizeof (uintmax_t) * CHAR_BIT };
663 /* Verify that uintmax_t does not have holes in its representation. */
664 static_assert (UINTMAX_MAX >> (W - 1) == 1);
666 #define P(a,b,c,d) a,
667 static const unsigned char primes_diff[] = {
668 #include "primes.h"
669 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
671 #undef P
673 #define PRIMES_PTAB_ENTRIES \
674 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
676 #define P(a,b,c,d) b,
677 static const unsigned char primes_diff8[] = {
678 #include "primes.h"
679 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
681 #undef P
683 struct primes_dtab
685 uintmax_t binv, lim;
688 #define P(a,b,c,d) {c,d},
689 static const struct primes_dtab primes_dtab[] = {
690 #include "primes.h"
691 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
693 #undef P
695 /* Verify that uintmax_t is not wider than
696 the integers used to generate primes.h. */
697 static_assert (W <= WIDE_UINT_BITS);
699 /* debugging for developers. Enables devmsg().
700 This flag is used only in the GMP code. */
701 static bool dev_debug = false;
703 /* Prove primality or run probabilistic tests. */
704 static bool flag_prove_primality = PROVE_PRIMALITY;
706 /* Number of Miller-Rabin tests to run when not proving primality. */
707 #define MR_REPS 25
709 static void
710 factor_insert_refind (struct factors *factors, uintmax_t p, int i, int off)
712 for (int j = 0; j < off; j++)
713 p += primes_diff[i + j];
714 factor_insert (factors, p);
717 /* Trial division with odd primes uses the following trick.
719 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
720 consider the case t < B (this is the second loop below).
722 From our tables we get
724 binv = p^{-1} (mod B)
725 lim = floor ((B-1) / p).
727 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
728 (and all quotients in this range occur for some t).
730 Then t = q * p is true also (mod B), and p is invertible we get
732 q = t * binv (mod B).
734 Next, assume that t is *not* divisible by p. Since multiplication
735 by binv (mod B) is a one-to-one mapping,
737 t * binv (mod B) > lim,
739 because all the smaller values are already taken.
741 This can be summed up by saying that the function
743 q(t) = binv * t (mod B)
745 is a permutation of the range 0 <= t < B, with the curious property
746 that it maps the multiples of p onto the range 0 <= q <= lim, in
747 order, and the non-multiples of p onto the range lim < q < B.
750 static uintmax_t
751 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
752 struct factors *factors)
754 if (t0 % 2 == 0)
756 int cnt;
758 if (t0 == 0)
760 count_trailing_zeros (cnt, t1);
761 t0 = t1 >> cnt;
762 t1 = 0;
763 cnt += W_TYPE_SIZE;
765 else
767 count_trailing_zeros (cnt, t0);
768 rsh2 (t1, t0, t1, t0, cnt);
771 factor_insert_multiplicity (factors, 2, cnt);
774 uintmax_t p = 3;
775 idx_t i;
776 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
778 for (;;)
780 uintmax_t q1, q0, hi;
781 MAYBE_UNUSED uintmax_t lo;
783 q0 = t0 * primes_dtab[i].binv;
784 umul_ppmm (hi, lo, q0, p);
785 if (hi > t1)
786 break;
787 hi = t1 - hi;
788 q1 = hi * primes_dtab[i].binv;
789 if (LIKELY (q1 > primes_dtab[i].lim))
790 break;
791 t1 = q1; t0 = q0;
792 factor_insert (factors, p);
794 p += primes_diff[i + 1];
796 if (t1p)
797 *t1p = t1;
799 #define DIVBLOCK(I) \
800 do { \
801 for (;;) \
803 q = t0 * pd[I].binv; \
804 if (LIKELY (q > pd[I].lim)) \
805 break; \
806 t0 = q; \
807 factor_insert_refind (factors, p, i + 1, I); \
809 } while (0)
811 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
813 uintmax_t q;
814 const struct primes_dtab *pd = &primes_dtab[i];
815 DIVBLOCK (0);
816 DIVBLOCK (1);
817 DIVBLOCK (2);
818 DIVBLOCK (3);
819 DIVBLOCK (4);
820 DIVBLOCK (5);
821 DIVBLOCK (6);
822 DIVBLOCK (7);
824 p += primes_diff8[i];
825 if (p * p > t0)
826 break;
829 return t0;
832 static void
833 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
835 mpz_t q;
836 mp_bitcnt_t p;
838 devmsg ("[trial division] ");
840 mpz_init (q);
842 p = mpz_scan1 (t, 0);
843 mpz_fdiv_q_2exp (t, t, p);
844 while (p)
846 mp_factor_insert_ui (factors, 2);
847 --p;
850 unsigned long int d = 3;
851 for (idx_t i = 1; i <= PRIMES_PTAB_ENTRIES;)
853 if (! mpz_divisible_ui_p (t, d))
855 d += primes_diff[i++];
856 if (mpz_cmp_ui (t, d * d) < 0)
857 break;
859 else
861 mpz_tdiv_q_ui (t, t, d);
862 mp_factor_insert_ui (factors, d);
866 mpz_clear (q);
869 /* Entry i contains (2i+1)^(-1) mod 2^8. */
870 static const unsigned char binvert_table[128] =
872 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
873 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
874 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
875 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
876 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
877 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
878 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
879 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
880 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
881 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
882 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
883 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
884 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
885 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
886 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
887 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
890 /* Compute n^(-1) mod B, using a Newton iteration. */
891 #define binv(inv,n) \
892 do { \
893 uintmax_t __n = (n); \
894 uintmax_t __inv; \
896 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
897 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
898 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
899 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
901 if (W_TYPE_SIZE > 64) \
903 int __invbits = 64; \
904 do { \
905 __inv = 2 * __inv - __inv * __inv * __n; \
906 __invbits *= 2; \
907 } while (__invbits < W_TYPE_SIZE); \
910 (inv) = __inv; \
911 } while (0)
913 /* q = u / d, assuming d|u. */
914 #define divexact_21(q1, q0, u1, u0, d) \
915 do { \
916 uintmax_t _di, _q0; \
917 binv (_di, (d)); \
918 _q0 = (u0) * _di; \
919 if ((u1) >= (d)) \
921 uintmax_t _p1; \
922 MAYBE_UNUSED intmax_t _p0; \
923 umul_ppmm (_p1, _p0, _q0, d); \
924 (q1) = ((u1) - _p1) * _di; \
925 (q0) = _q0; \
927 else \
929 (q0) = _q0; \
930 (q1) = 0; \
932 } while (0)
934 /* x B (mod n). */
935 #define redcify(r_prim, r, n) \
936 do { \
937 MAYBE_UNUSED uintmax_t _redcify_q; \
938 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
939 } while (0)
941 /* x B^2 (mod n). Requires x > 0, n1 < B/2. */
942 #define redcify2(r1, r0, x, n1, n0) \
943 do { \
944 uintmax_t _r1, _r0, _i; \
945 if ((x) < (n1)) \
947 _r1 = (x); _r0 = 0; \
948 _i = W_TYPE_SIZE; \
950 else \
952 _r1 = 0; _r0 = (x); \
953 _i = 2 * W_TYPE_SIZE; \
955 while (_i-- > 0) \
957 lsh2 (_r1, _r0, _r1, _r0, 1); \
958 if (ge2 (_r1, _r0, (n1), (n0))) \
959 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
961 (r1) = _r1; \
962 (r0) = _r0; \
963 } while (0)
965 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
966 Both a and b must be in redc form, the result will be in redc form too. */
967 static inline uintmax_t
968 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
970 uintmax_t rh, rl, q, th, xh;
971 MAYBE_UNUSED uintmax_t tl;
973 umul_ppmm (rh, rl, a, b);
974 q = rl * mi;
975 umul_ppmm (th, tl, q, m);
976 xh = rh - th;
977 if (rh < th)
978 xh += m;
980 return xh;
983 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
984 Both a and b must be in redc form, the result will be in redc form too.
985 For performance reasons, the most significant bit of m must be clear. */
986 static uintmax_t
987 mulredc2 (uintmax_t *r1p,
988 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
989 uintmax_t m1, uintmax_t m0, uintmax_t mi)
991 uintmax_t r1, r0, q, p1, t1, t0, s1, s0;
992 MAYBE_UNUSED uintmax_t p0;
993 mi = -mi;
994 affirm ((a1 >> (W_TYPE_SIZE - 1)) == 0);
995 affirm ((b1 >> (W_TYPE_SIZE - 1)) == 0);
996 affirm ((m1 >> (W_TYPE_SIZE - 1)) == 0);
998 /* First compute a0 * <b1, b0> B^{-1}
999 +-----+
1000 |a0 b0|
1001 +--+--+--+
1002 |a0 b1|
1003 +--+--+--+
1004 |q0 m0|
1005 +--+--+--+
1006 |q0 m1|
1007 -+--+--+--+
1008 |r1|r0| 0|
1009 +--+--+--+
1011 umul_ppmm (t1, t0, a0, b0);
1012 umul_ppmm (r1, r0, a0, b1);
1013 q = mi * t0;
1014 umul_ppmm (p1, p0, q, m0);
1015 umul_ppmm (s1, s0, q, m1);
1016 r0 += (t0 != 0); /* Carry */
1017 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1018 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1019 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1021 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1022 +-----+
1023 |a1 b0|
1024 +--+--+
1025 |r1|r0|
1026 +--+--+--+
1027 |a1 b1|
1028 +--+--+--+
1029 |q1 m0|
1030 +--+--+--+
1031 |q1 m1|
1032 -+--+--+--+
1033 |r1|r0| 0|
1034 +--+--+--+
1036 umul_ppmm (t1, t0, a1, b0);
1037 umul_ppmm (s1, s0, a1, b1);
1038 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1039 q = mi * t0;
1040 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1041 umul_ppmm (p1, p0, q, m0);
1042 umul_ppmm (s1, s0, q, m1);
1043 r0 += (t0 != 0); /* Carry */
1044 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1045 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1046 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1048 if (ge2 (r1, r0, m1, m0))
1049 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1051 *r1p = r1;
1052 return r0;
1055 ATTRIBUTE_CONST
1056 static uintmax_t
1057 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1059 uintmax_t y = one;
1061 if (e & 1)
1062 y = b;
1064 while (e != 0)
1066 b = mulredc (b, b, n, ni);
1067 e >>= 1;
1069 if (e & 1)
1070 y = mulredc (y, b, n, ni);
1073 return y;
1076 static uintmax_t
1077 powm2 (uintmax_t *r1m,
1078 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1079 uintmax_t ni, const uintmax_t *one)
1081 uintmax_t r1, r0, b1, b0, n1, n0;
1082 int i;
1083 uintmax_t e;
1085 b0 = bp[0];
1086 b1 = bp[1];
1087 n0 = np[0];
1088 n1 = np[1];
1090 r0 = one[0];
1091 r1 = one[1];
1093 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1095 if (e & 1)
1097 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1098 r1 = *r1m;
1100 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1101 b1 = *r1m;
1103 for (e = ep[1]; e > 0; e >>= 1)
1105 if (e & 1)
1107 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1108 r1 = *r1m;
1110 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1111 b1 = *r1m;
1113 *r1m = r1;
1114 return r0;
1117 ATTRIBUTE_CONST
1118 static bool
1119 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1120 int k, uintmax_t one)
1122 uintmax_t y = powm (b, q, n, ni, one);
1124 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1126 if (y == one || y == nm1)
1127 return true;
1129 for (int i = 1; i < k; i++)
1131 y = mulredc (y, y, n, ni);
1133 if (y == nm1)
1134 return true;
1135 if (y == one)
1136 return false;
1138 return false;
1141 ATTRIBUTE_PURE static bool
1142 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1143 const uintmax_t *qp, int k, const uintmax_t *one)
1145 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1147 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1148 y1 = r1m;
1150 if (y0 == one[0] && y1 == one[1])
1151 return true;
1153 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1155 if (y0 == nm1_0 && y1 == nm1_1)
1156 return true;
1158 for (int i = 1; i < k; i++)
1160 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1161 y1 = r1m;
1163 if (y0 == nm1_0 && y1 == nm1_1)
1164 return true;
1165 if (y0 == one[0] && y1 == one[1])
1166 return false;
1168 return false;
1171 static bool
1172 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1173 mpz_srcptr q, mp_bitcnt_t k)
1175 mpz_powm (y, x, q, n);
1177 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1178 return true;
1180 for (mp_bitcnt_t i = 1; i < k; i++)
1182 mpz_powm_ui (y, y, 2, n);
1183 if (mpz_cmp (y, nm1) == 0)
1184 return true;
1185 if (mpz_cmp_ui (y, 1) == 0)
1186 return false;
1188 return false;
1191 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1192 have been observed. The average seem to be about 2. */
1193 static bool ATTRIBUTE_PURE
1194 prime_p (uintmax_t n)
1196 mp_bitcnt_t k;
1197 bool is_prime;
1198 uintmax_t a_prim, one, ni;
1199 struct factors factors;
1201 if (n <= 1)
1202 return false;
1204 /* We have already casted out small primes. */
1205 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1206 return true;
1208 /* Precomputation for Miller-Rabin. */
1209 uintmax_t q = n - 1;
1210 for (k = 0; (q & 1) == 0; k++)
1211 q >>= 1;
1213 uintmax_t a = 2;
1214 binv (ni, n); /* ni <- 1/n mod B */
1215 redcify (one, 1, n);
1216 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1218 /* Perform a Miller-Rabin test, finds most composites quickly. */
1219 if (!millerrabin (n, ni, a_prim, q, k, one))
1220 return false;
1222 if (flag_prove_primality)
1224 /* Factor n-1 for Lucas. */
1225 factor (0, n - 1, &factors);
1228 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1229 number composite. */
1230 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1232 if (flag_prove_primality)
1234 is_prime = true;
1235 for (int i = 0; i < factors.nfactors && is_prime; i++)
1237 is_prime
1238 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1241 else
1243 /* After enough Miller-Rabin runs, be content. */
1244 is_prime = (r == MR_REPS - 1);
1247 if (is_prime)
1248 return true;
1250 a += primes_diff[r]; /* Establish new base. */
1252 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1253 on most processors, since it avoids udiv_qrnnd. If we go down the
1254 udiv_qrnnd_preinv path, this code should be replaced. */
1256 uintmax_t s1, s0;
1257 umul_ppmm (s1, s0, one, a);
1258 if (LIKELY (s1 == 0))
1259 a_prim = s0 % n;
1260 else
1262 MAYBE_UNUSED uintmax_t dummy;
1263 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1267 if (!millerrabin (n, ni, a_prim, q, k, one))
1268 return false;
1271 affirm (!"Lucas prime test failure. This should not happen");
1274 static bool ATTRIBUTE_PURE
1275 prime2_p (uintmax_t n1, uintmax_t n0)
1277 uintmax_t q[2], nm1[2];
1278 uintmax_t a_prim[2];
1279 uintmax_t one[2];
1280 uintmax_t na[2];
1281 uintmax_t ni;
1282 int k;
1283 struct factors factors;
1285 if (n1 == 0)
1286 return prime_p (n0);
1288 nm1[1] = n1 - (n0 == 0);
1289 nm1[0] = n0 - 1;
1290 if (nm1[0] == 0)
1292 count_trailing_zeros (k, nm1[1]);
1294 q[0] = nm1[1] >> k;
1295 q[1] = 0;
1296 k += W_TYPE_SIZE;
1298 else
1300 count_trailing_zeros (k, nm1[0]);
1301 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1304 uintmax_t a = 2;
1305 binv (ni, n0);
1306 redcify2 (one[1], one[0], 1, n1, n0);
1307 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1309 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1310 na[0] = n0;
1311 na[1] = n1;
1313 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1314 return false;
1316 if (flag_prove_primality)
1318 /* Factor n-1 for Lucas. */
1319 factor (nm1[1], nm1[0], &factors);
1322 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1323 number composite. */
1324 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1326 bool is_prime;
1327 uintmax_t e[2], y[2];
1329 if (flag_prove_primality)
1331 is_prime = true;
1332 if (factors.plarge[1])
1334 uintmax_t pi;
1335 binv (pi, factors.plarge[0]);
1336 e[0] = pi * nm1[0];
1337 e[1] = 0;
1338 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1339 is_prime = (y[0] != one[0] || y[1] != one[1]);
1341 for (int i = 0; i < factors.nfactors && is_prime; i++)
1343 /* FIXME: We always have the factor 2. Do we really need to
1344 handle it here? We have done the same powering as part
1345 of millerrabin. */
1346 if (factors.p[i] == 2)
1347 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1348 else
1349 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1350 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1351 is_prime = (y[0] != one[0] || y[1] != one[1]);
1354 else
1356 /* After enough Miller-Rabin runs, be content. */
1357 is_prime = (r == MR_REPS - 1);
1360 if (is_prime)
1361 return true;
1363 a += primes_diff[r]; /* Establish new base. */
1364 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1366 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1367 return false;
1370 affirm (!"Lucas prime test failure. This should not happen");
1373 static bool
1374 mp_prime_p (mpz_t n)
1376 bool is_prime;
1377 mpz_t q, a, nm1, tmp;
1378 struct mp_factors factors;
1380 if (mpz_cmp_ui (n, 1) <= 0)
1381 return false;
1383 /* We have already casted out small primes. */
1384 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1385 return true;
1387 mpz_inits (q, a, nm1, tmp, nullptr);
1389 /* Precomputation for Miller-Rabin. */
1390 mpz_sub_ui (nm1, n, 1);
1392 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1393 mp_bitcnt_t k = mpz_scan1 (nm1, 0);
1394 mpz_tdiv_q_2exp (q, nm1, k);
1396 mpz_set_ui (a, 2);
1398 /* Perform a Miller-Rabin test, finds most composites quickly. */
1399 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1401 is_prime = false;
1402 goto ret2;
1405 if (flag_prove_primality)
1407 /* Factor n-1 for Lucas. */
1408 mpz_set (tmp, nm1);
1409 mp_factor (tmp, &factors);
1412 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1413 number composite. */
1414 for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1416 if (flag_prove_primality)
1418 is_prime = true;
1419 for (idx_t i = 0; i < factors.nfactors && is_prime; i++)
1421 mpz_divexact (tmp, nm1, factors.p[i]);
1422 mpz_powm (tmp, a, tmp, n);
1423 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1426 else
1428 /* After enough Miller-Rabin runs, be content. */
1429 is_prime = (r == MR_REPS - 1);
1432 if (is_prime)
1433 goto ret1;
1435 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1437 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1439 is_prime = false;
1440 goto ret1;
1444 affirm (!"Lucas prime test failure. This should not happen");
1446 ret1:
1447 if (flag_prove_primality)
1448 mp_factor_clear (&factors);
1449 ret2:
1450 mpz_clears (q, a, nm1, tmp, nullptr);
1452 return is_prime;
1455 static void
1456 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1457 struct factors *factors)
1459 uintmax_t x, z, y, P, t, ni, g;
1461 unsigned long int k = 1;
1462 unsigned long int l = 1;
1464 redcify (P, 1, n);
1465 addmod (x, P, P, n); /* i.e., redcify(2) */
1466 y = z = x;
1468 while (n != 1)
1470 affirm (a < n);
1472 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1474 for (;;)
1478 x = mulredc (x, x, n, ni);
1479 addmod (x, x, a, n);
1481 submod (t, z, x, n);
1482 P = mulredc (P, t, n, ni);
1484 if (k % 32 == 1)
1486 if (gcd_odd (P, n) != 1)
1487 goto factor_found;
1488 y = x;
1491 while (--k != 0);
1493 z = x;
1494 k = l;
1495 l = 2 * l;
1496 for (unsigned long int i = 0; i < k; i++)
1498 x = mulredc (x, x, n, ni);
1499 addmod (x, x, a, n);
1501 y = x;
1504 factor_found:
1507 y = mulredc (y, y, n, ni);
1508 addmod (y, y, a, n);
1510 submod (t, z, y, n);
1511 g = gcd_odd (t, n);
1513 while (g == 1);
1515 if (n == g)
1517 /* Found n itself as factor. Restart with different params. */
1518 factor_using_pollard_rho (n, a + 1, factors);
1519 return;
1522 n = n / g;
1524 if (!prime_p (g))
1525 factor_using_pollard_rho (g, a + 1, factors);
1526 else
1527 factor_insert (factors, g);
1529 if (prime_p (n))
1531 factor_insert (factors, n);
1532 break;
1535 x = x % n;
1536 z = z % n;
1537 y = y % n;
1541 static void
1542 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1543 struct factors *factors)
1545 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1547 unsigned long int k = 1;
1548 unsigned long int l = 1;
1550 redcify2 (P1, P0, 1, n1, n0);
1551 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1552 y1 = z1 = x1;
1553 y0 = z0 = x0;
1555 while (n1 != 0 || n0 != 1)
1557 binv (ni, n0);
1559 for (;;)
1563 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1564 x1 = r1m;
1565 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1567 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1568 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1569 P1 = r1m;
1571 if (k % 32 == 1)
1573 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1574 if (g1 != 0 || g0 != 1)
1575 goto factor_found;
1576 y1 = x1; y0 = x0;
1579 while (--k != 0);
1581 z1 = x1; z0 = x0;
1582 k = l;
1583 l = 2 * l;
1584 for (unsigned long int i = 0; i < k; i++)
1586 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1587 x1 = r1m;
1588 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1590 y1 = x1; y0 = x0;
1593 factor_found:
1596 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1597 y1 = r1m;
1598 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1600 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1601 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1603 while (g1 == 0 && g0 == 1);
1605 if (g1 == 0)
1607 /* The found factor is one word, and > 1. */
1608 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1610 if (!prime_p (g0))
1611 factor_using_pollard_rho (g0, a + 1, factors);
1612 else
1613 factor_insert (factors, g0);
1615 else
1617 /* The found factor is two words. This is highly unlikely, thus hard
1618 to trigger. Please be careful before you change this code! */
1619 uintmax_t ginv;
1621 if (n1 == g1 && n0 == g0)
1623 /* Found n itself as factor. Restart with different params. */
1624 factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1625 return;
1628 /* Compute n = n / g. Since the result will fit one word,
1629 we can compute the quotient modulo B, ignoring the high
1630 divisor word. */
1631 binv (ginv, g0);
1632 n0 = ginv * n0;
1633 n1 = 0;
1635 if (!prime2_p (g1, g0))
1636 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1637 else
1638 factor_insert_large (factors, g1, g0);
1641 if (n1 == 0)
1643 if (prime_p (n0))
1645 factor_insert (factors, n0);
1646 break;
1649 factor_using_pollard_rho (n0, a, factors);
1650 return;
1653 if (prime2_p (n1, n0))
1655 factor_insert_large (factors, n1, n0);
1656 break;
1659 x0 = mod2 (&x1, x1, x0, n1, n0);
1660 z0 = mod2 (&z1, z1, z0, n1, n0);
1661 y0 = mod2 (&y1, y1, y0, n1, n0);
1665 static void
1666 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1667 struct mp_factors *factors)
1669 mpz_t x, z, y, P;
1670 mpz_t t, t2;
1672 devmsg ("[pollard-rho (%lu)] ", a);
1674 mpz_inits (t, t2, nullptr);
1675 mpz_init_set_si (y, 2);
1676 mpz_init_set_si (x, 2);
1677 mpz_init_set_si (z, 2);
1678 mpz_init_set_ui (P, 1);
1680 unsigned long long int k = 1;
1681 unsigned long long int l = 1;
1683 while (mpz_cmp_ui (n, 1) != 0)
1685 for (;;)
1689 mpz_mul (t, x, x);
1690 mpz_mod (x, t, n);
1691 mpz_add_ui (x, x, a);
1693 mpz_sub (t, z, x);
1694 mpz_mul (t2, P, t);
1695 mpz_mod (P, t2, n);
1697 if (k % 32 == 1)
1699 mpz_gcd (t, P, n);
1700 if (mpz_cmp_ui (t, 1) != 0)
1701 goto factor_found;
1702 mpz_set (y, x);
1705 while (--k != 0);
1707 mpz_set (z, x);
1708 k = l;
1709 l = 2 * l;
1710 for (unsigned long long int i = 0; i < k; i++)
1712 mpz_mul (t, x, x);
1713 mpz_mod (x, t, n);
1714 mpz_add_ui (x, x, a);
1716 mpz_set (y, x);
1719 factor_found:
1722 mpz_mul (t, y, y);
1723 mpz_mod (y, t, n);
1724 mpz_add_ui (y, y, a);
1726 mpz_sub (t, z, y);
1727 mpz_gcd (t, t, n);
1729 while (mpz_cmp_ui (t, 1) == 0);
1731 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1733 if (!mp_prime_p (t))
1735 devmsg ("[composite factor--restarting pollard-rho] ");
1736 mp_factor_using_pollard_rho (t, a + 1, factors);
1738 else
1740 mp_factor_insert (factors, t);
1743 if (mp_prime_p (n))
1745 mp_factor_insert (factors, n);
1746 break;
1749 mpz_mod (x, x, n);
1750 mpz_mod (z, z, n);
1751 mpz_mod (y, y, n);
1754 mpz_clears (P, t2, t, z, x, y, nullptr);
1757 #if USE_SQUFOF
1758 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1759 algorithm is replaced, consider also returning the remainder. */
1760 ATTRIBUTE_CONST
1761 static uintmax_t
1762 isqrt (uintmax_t n)
1764 uintmax_t x;
1765 int c;
1766 if (n == 0)
1767 return 0;
1769 count_leading_zeros (c, n);
1771 /* Make x > sqrt(n). This will be invariant through the loop. */
1772 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
1774 for (;;)
1776 uintmax_t y = (x + n / x) / 2;
1777 if (y >= x)
1778 return x;
1780 x = y;
1784 ATTRIBUTE_CONST
1785 static uintmax_t
1786 isqrt2 (uintmax_t nh, uintmax_t nl)
1788 int shift;
1789 uintmax_t x;
1791 /* Ensures the remainder fits in an uintmax_t. */
1792 affirm (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1794 if (nh == 0)
1795 return isqrt (nl);
1797 count_leading_zeros (shift, nh);
1798 shift &= ~1;
1800 /* Make x > sqrt (n). */
1801 x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1802 x <<= (W_TYPE_SIZE - shift) >> 1;
1804 /* Do we need more than one iteration? */
1805 for (;;)
1807 MAYBE_UNUSED uintmax_t r;
1808 uintmax_t q, y;
1809 udiv_qrnnd (q, r, nh, nl, x);
1810 y = (x + q) / 2;
1812 if (y >= x)
1814 uintmax_t hi, lo;
1815 umul_ppmm (hi, lo, x + 1, x + 1);
1816 affirm (gt2 (hi, lo, nh, nl));
1818 umul_ppmm (hi, lo, x, x);
1819 affirm (ge2 (nh, nl, hi, lo));
1820 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1821 affirm (hi == 0);
1823 return x;
1826 x = y;
1830 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1831 # define MAGIC64 0x0202021202030213ULL
1832 # define MAGIC63 0x0402483012450293ULL
1833 # define MAGIC65 0x218a019866014613ULL
1834 # define MAGIC11 0x23b
1836 /* Return the square root if the input is a square, otherwise 0. */
1837 ATTRIBUTE_CONST
1838 static uintmax_t
1839 is_square (uintmax_t x)
1841 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1842 computing the square root. */
1843 if (((MAGIC64 >> (x & 63)) & 1)
1844 && ((MAGIC63 >> (x % 63)) & 1)
1845 /* Both 0 and 64 are squares mod (65). */
1846 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1847 && ((MAGIC11 >> (x % 11) & 1)))
1849 uintmax_t r = isqrt (x);
1850 if (r * r == x)
1851 return r;
1853 return 0;
1856 /* invtab[i] = floor (0x10000 / (0x100 + i) */
1857 static short const invtab[0x81] =
1859 0x200,
1860 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1861 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1862 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1863 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1864 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1865 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1866 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1867 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1868 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1869 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1870 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1871 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1872 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1873 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1874 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1875 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1878 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1879 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1880 # define div_smallq(q, r, u, d) \
1881 do { \
1882 if ((u) / 0x40 < (d)) \
1884 int _cnt; \
1885 uintmax_t _dinv, _mask, _q, _r; \
1886 count_leading_zeros (_cnt, (d)); \
1887 _r = (u); \
1888 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1890 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1891 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1893 else \
1895 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1896 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1898 _r -= _q * (d); \
1900 _mask = -(uintmax_t) (_r >= (d)); \
1901 (r) = _r - (_mask & (d)); \
1902 (q) = _q - _mask; \
1903 affirm ((q) * (d) + (r) == u); \
1905 else \
1907 uintmax_t _q = (u) / (d); \
1908 (r) = (u) - _q * (d); \
1909 (q) = _q; \
1911 } while (0)
1913 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1914 = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
1915 with 3025 = 55^2.
1917 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1918 representing G_0 = (-55, 2 * 4652, 8653).
1920 In the notation of the paper:
1922 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1926 t_0 = floor([q_0 + R_0] / S0) = 1
1927 R_1 = t_0 * S_0 - R_0 = 4001
1928 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1931 /* Multipliers, in order of efficiency:
1932 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1933 0.7317 3*5*7 = 105 = 1
1934 0.7820 3*5*11 = 165 = 1
1935 0.7872 3*5 = 15 = 3
1936 0.8101 3*7*11 = 231 = 3
1937 0.8155 3*7 = 21 = 1
1938 0.8284 5*7*11 = 385 = 1
1939 0.8339 5*7 = 35 = 3
1940 0.8716 3*11 = 33 = 1
1941 0.8774 3 = 3 = 3
1942 0.8913 5*11 = 55 = 3
1943 0.8972 5 = 5 = 1
1944 0.9233 7*11 = 77 = 1
1945 0.9295 7 = 7 = 3
1946 0.9934 11 = 11 = 3
1948 # define QUEUE_SIZE 50
1949 #endif
1951 #if STAT_SQUFOF
1952 # define Q_FREQ_SIZE 50
1953 /* Element 0 keeps the total */
1954 static int q_freq[Q_FREQ_SIZE + 1];
1955 #endif
1957 #if USE_SQUFOF
1958 /* Return true on success. Expected to fail only for numbers
1959 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1960 static bool
1961 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1963 /* Uses algorithm and notation from
1965 SQUARE FORM FACTORIZATION
1966 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1968 https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1971 static short const multipliers_1[] =
1972 { /* = 1 (mod 4) */
1973 105, 165, 21, 385, 33, 5, 77, 1, 0
1975 static short const multipliers_3[] =
1976 { /* = 3 (mod 4) */
1977 1155, 15, 231, 35, 3, 55, 7, 11, 0
1980 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1982 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1983 return false;
1985 uintmax_t sqrt_n = isqrt2 (n1, n0);
1987 if (n0 == sqrt_n * sqrt_n)
1989 uintmax_t p1, p0;
1991 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1992 affirm (p0 == n0);
1994 if (n1 == p1)
1996 if (prime_p (sqrt_n))
1997 factor_insert_multiplicity (factors, sqrt_n, 2);
1998 else
2000 struct factors f;
2002 f.nfactors = 0;
2003 if (!factor_using_squfof (0, sqrt_n, &f))
2005 /* Try pollard rho instead */
2006 factor_using_pollard_rho (sqrt_n, 1, &f);
2008 /* Duplicate the new factors */
2009 for (unsigned int i = 0; i < f.nfactors; i++)
2010 factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
2012 return true;
2016 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2017 for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2018 *m; m++)
2020 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2021 unsigned int i;
2022 unsigned int mu = *m;
2023 int qpos = 0;
2025 affirm (mu * n0 % 4 == 3);
2027 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2028 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2029 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2030 mu^3 < n.
2032 However, this seems insufficient: With n = 37243139 and mu =
2033 105, we get a trivial factor, from the square 38809 = 197^2,
2034 without any corresponding Q earlier in the iteration.
2036 Requiring 64 mu^3 < n seems sufficient. */
2037 if (n1 == 0)
2039 if ((uintmax_t) mu * mu * mu >= n0 / 64)
2040 continue;
2042 else
2044 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2045 continue;
2047 umul_ppmm (Dh, Dl, n0, mu);
2048 Dh += n1 * mu;
2050 affirm (Dl % 4 != 1);
2051 affirm (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2053 S = isqrt2 (Dh, Dl);
2055 Q1 = 1;
2056 P = S;
2058 /* Square root remainder fits in one word, so ignore high part. */
2059 Q = Dl - P * P;
2060 /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */
2061 L = isqrt (2 * S);
2062 B = 2 * L;
2063 L1 = mu * 2 * L;
2065 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2066 4 D. */
2068 for (i = 0; i <= B; i++)
2070 uintmax_t q, P1, t, rem;
2072 div_smallq (q, rem, S + P, Q);
2073 P1 = S - rem; /* P1 = q*Q - P */
2075 affirm (q > 0 && Q > 0);
2077 # if STAT_SQUFOF
2078 q_freq[0]++;
2079 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2080 # endif
2082 if (Q <= L1)
2084 uintmax_t g = Q;
2086 if ((Q & 1) == 0)
2087 g /= 2;
2089 g /= gcd_odd (g, mu);
2091 if (g <= L)
2093 if (qpos >= QUEUE_SIZE)
2094 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2095 queue[qpos].Q = g;
2096 queue[qpos].P = P % g;
2097 qpos++;
2101 /* I think the difference can be either sign, but mod
2102 2^W_TYPE_SIZE arithmetic should be fine. */
2103 t = Q1 + q * (P - P1);
2104 Q1 = Q;
2105 Q = t;
2106 P = P1;
2108 if ((i & 1) == 0)
2110 uintmax_t r = is_square (Q);
2111 if (r)
2113 for (int j = 0; j < qpos; j++)
2115 if (queue[j].Q == r)
2117 if (r == 1)
2118 /* Traversed entire cycle. */
2119 goto next_multiplier;
2121 /* Need the absolute value for divisibility test. */
2122 if (P >= queue[j].P)
2123 t = P - queue[j].P;
2124 else
2125 t = queue[j].P - P;
2126 if (t % r == 0)
2128 /* Delete entries up to and including entry
2129 j, which matched. */
2130 memmove (queue, queue + j + 1,
2131 (qpos - j - 1) * sizeof (queue[0]));
2132 qpos -= (j + 1);
2134 goto next_i;
2138 /* We have found a square form, which should give a
2139 factor. */
2140 Q1 = r;
2141 affirm (S >= P); /* What signs are possible? */
2142 P += r * ((S - P) / r);
2144 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2145 for the case D = 2N. */
2146 /* Compute Q = (D - P*P) / Q1, but we need double
2147 precision. */
2148 uintmax_t hi, lo;
2149 umul_ppmm (hi, lo, P, P);
2150 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2151 udiv_qrnnd (Q, rem, hi, lo, Q1);
2152 affirm (rem == 0);
2154 for (;;)
2156 /* Note: There appears to by a typo in the paper,
2157 Step 4a in the algorithm description says q <--
2158 floor([S+P]/\hat Q), but looking at the equations
2159 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2160 (In this code, \hat Q is Q1). */
2161 div_smallq (q, rem, S + P, Q);
2162 P1 = S - rem; /* P1 = q*Q - P */
2164 # if STAT_SQUFOF
2165 q_freq[0]++;
2166 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2167 # endif
2168 if (P == P1)
2169 break;
2170 t = Q1 + q * (P - P1);
2171 Q1 = Q;
2172 Q = t;
2173 P = P1;
2176 if ((Q & 1) == 0)
2177 Q /= 2;
2178 Q /= gcd_odd (Q, mu);
2180 affirm (Q > 1 && (n1 || Q < n0));
2182 if (prime_p (Q))
2183 factor_insert (factors, Q);
2184 else if (!factor_using_squfof (0, Q, factors))
2185 factor_using_pollard_rho (Q, 2, factors);
2187 divexact_21 (n1, n0, n1, n0, Q);
2189 if (prime2_p (n1, n0))
2190 factor_insert_large (factors, n1, n0);
2191 else
2193 if (!factor_using_squfof (n1, n0, factors))
2195 if (n1 == 0)
2196 factor_using_pollard_rho (n0, 1, factors);
2197 else
2198 factor_using_pollard_rho2 (n1, n0, 1, factors);
2202 return true;
2205 next_i:;
2207 next_multiplier:;
2209 return false;
2211 #endif
2213 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2214 results in FACTORS. */
2215 static void
2216 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2218 factors->nfactors = 0;
2219 factors->plarge[1] = 0;
2221 if (t1 == 0 && t0 < 2)
2222 return;
2224 t0 = factor_using_division (&t1, t1, t0, factors);
2226 if (t1 == 0 && t0 < 2)
2227 return;
2229 if (prime2_p (t1, t0))
2230 factor_insert_large (factors, t1, t0);
2231 else
2233 #if USE_SQUFOF
2234 if (factor_using_squfof (t1, t0, factors))
2235 return;
2236 #endif
2238 if (t1 == 0)
2239 factor_using_pollard_rho (t0, 1, factors);
2240 else
2241 factor_using_pollard_rho2 (t1, t0, 1, factors);
2245 /* Use Pollard-rho to compute the prime factors of
2246 arbitrary-precision T, and put the results in FACTORS. */
2247 static void
2248 mp_factor (mpz_t t, struct mp_factors *factors)
2250 mp_factor_init (factors);
2252 if (mpz_sgn (t) != 0)
2254 mp_factor_using_division (t, factors);
2256 if (mpz_cmp_ui (t, 1) != 0)
2258 devmsg ("[is number prime?] ");
2259 if (mp_prime_p (t))
2260 mp_factor_insert (factors, t);
2261 else
2262 mp_factor_using_pollard_rho (t, 1, factors);
2267 static strtol_error
2268 strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s)
2270 int lo_carry;
2271 uintmax_t hi = 0, lo = 0;
2273 strtol_error err = LONGINT_INVALID;
2275 /* Initial scan for invalid digits. */
2276 char const *p = s;
2277 for (;;)
2279 unsigned char c = *p++;
2280 if (c == 0)
2281 break;
2283 if (UNLIKELY (!ISDIGIT (c)))
2285 err = LONGINT_INVALID;
2286 break;
2289 err = LONGINT_OK; /* we've seen at least one valid digit */
2292 while (err == LONGINT_OK)
2294 unsigned char c = *s++;
2295 if (c == 0)
2296 break;
2298 c -= '0';
2300 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2302 err = LONGINT_OVERFLOW;
2303 break;
2305 hi = 10 * hi;
2307 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2308 lo_carry += 10 * lo < 2 * lo;
2310 lo = 10 * lo;
2311 lo += c;
2313 lo_carry += lo < c;
2314 hi += lo_carry;
2315 if (UNLIKELY (hi < lo_carry))
2317 err = LONGINT_OVERFLOW;
2318 break;
2322 *hip = hi;
2323 *lop = lo;
2325 return err;
2328 /* Structure and routines for buffering and outputting full lines,
2329 to support parallel operation efficiently. */
2330 static struct lbuf_
2332 char *buf;
2333 char *end;
2334 } lbuf;
2336 /* 512 is chosen to give good performance,
2337 and also is the max guaranteed size that
2338 consumers can read atomically through pipes.
2339 Also it's big enough to cater for max line length
2340 even with 128 bit uintmax_t. */
2341 #define FACTOR_PIPE_BUF 512
2343 static void
2344 lbuf_alloc (void)
2346 if (lbuf.buf)
2347 return;
2349 /* Double to ensure enough space for
2350 previous numbers + next number. */
2351 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2352 lbuf.end = lbuf.buf;
2355 /* Write complete LBUF to standard output. */
2356 static void
2357 lbuf_flush (void)
2359 size_t size = lbuf.end - lbuf.buf;
2360 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2361 write_error ();
2362 lbuf.end = lbuf.buf;
2365 /* Add a character C to LBUF and if it's a newline
2366 and enough bytes are already buffered,
2367 then write atomically to standard output. */
2368 static void
2369 lbuf_putc (char c)
2371 *lbuf.end++ = c;
2373 if (c == '\n')
2375 size_t buffered = lbuf.end - lbuf.buf;
2377 /* Provide immediate output for interactive use. */
2378 static int line_buffered = -1;
2379 if (line_buffered == -1)
2380 line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO);
2381 if (line_buffered)
2382 lbuf_flush ();
2383 else if (buffered >= FACTOR_PIPE_BUF)
2385 /* Write output in <= PIPE_BUF chunks
2386 so consumers can read atomically. */
2387 char const *tend = lbuf.end;
2389 /* Since a umaxint_t's factors must fit in 512
2390 we're guaranteed to find a newline here. */
2391 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2392 while (*--tlend != '\n');
2393 tlend++;
2395 lbuf.end = tlend;
2396 lbuf_flush ();
2398 /* Buffer the remainder. */
2399 memcpy (lbuf.buf, tlend, tend - tlend);
2400 lbuf.end = lbuf.buf + (tend - tlend);
2405 /* Buffer an int to the internal LBUF. */
2406 static void
2407 lbuf_putint (uintmax_t i, size_t min_width)
2409 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2410 char const *umaxstr = umaxtostr (i, buf);
2411 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2412 size_t z = width;
2414 for (; z < min_width; z++)
2415 *lbuf.end++ = '0';
2417 memcpy (lbuf.end, umaxstr, width);
2418 lbuf.end += width;
2421 static void
2422 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2424 uintmax_t q, r;
2426 if (t1 == 0)
2427 lbuf_putint (t0, 0);
2428 else
2430 /* Use very plain code here since it seems hard to write fast code
2431 without assuming a specific word size. */
2432 q = t1 / 1000000000;
2433 r = t1 % 1000000000;
2434 udiv_qrnnd (t0, r, r, t0, 1000000000);
2435 print_uintmaxes (q, t0);
2436 lbuf_putint (r, 9);
2440 /* Single-precision factoring */
2441 static void
2442 print_factors_single (uintmax_t t1, uintmax_t t0)
2444 struct factors factors;
2446 print_uintmaxes (t1, t0);
2447 lbuf_putc (':');
2449 factor (t1, t0, &factors);
2451 for (int j = 0; j < factors.nfactors; j++)
2452 for (int k = 0; k < factors.e[j]; k++)
2454 lbuf_putc (' ');
2455 print_uintmaxes (0, factors.p[j]);
2456 if (print_exponents && factors.e[j] > 1)
2458 lbuf_putc ('^');
2459 lbuf_putint (factors.e[j], 0);
2460 break;
2464 if (factors.plarge[1])
2466 lbuf_putc (' ');
2467 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2470 lbuf_putc ('\n');
2473 /* Emit the factors of the indicated number. If we have the option of using
2474 either algorithm, we select on the basis of the length of the number.
2475 For longer numbers, we prefer the MP algorithm even if the native algorithm
2476 has enough digits, because the algorithm is better. The turnover point
2477 depends on the value. */
2478 static bool
2479 print_factors (char const *input)
2481 /* Skip initial spaces and '+'. */
2482 char const *str = input;
2483 while (*str == ' ')
2484 str++;
2485 str += *str == '+';
2487 uintmax_t t1, t0;
2489 /* Try converting the number to one or two words. If it fails, use GMP or
2490 print an error message. The 2nd condition checks that the most
2491 significant bit of the two-word number is clear, in a typesize neutral
2492 way. */
2493 strtol_error err = strto2uintmax (&t1, &t0, str);
2495 switch (err)
2497 case LONGINT_OK:
2498 if (((t1 << 1) >> 1) == t1)
2500 devmsg ("[using single-precision arithmetic] ");
2501 print_factors_single (t1, t0);
2502 return true;
2504 break;
2506 case LONGINT_OVERFLOW:
2507 /* Try GMP. */
2508 break;
2510 default:
2511 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2512 return false;
2515 devmsg ("[using arbitrary-precision arithmetic] ");
2516 mpz_t t;
2517 struct mp_factors factors;
2519 mpz_init_set_str (t, str, 10);
2521 mpz_out_str (stdout, 10, t);
2522 putchar (':');
2523 mp_factor (t, &factors);
2525 for (idx_t j = 0; j < factors.nfactors; j++)
2526 for (unsigned long int k = 0; k < factors.e[j]; k++)
2528 putchar (' ');
2529 mpz_out_str (stdout, 10, factors.p[j]);
2530 if (print_exponents && factors.e[j] > 1)
2532 printf ("^%lu", factors.e[j]);
2533 break;
2537 mp_factor_clear (&factors);
2538 mpz_clear (t);
2539 putchar ('\n');
2540 fflush (stdout);
2541 return true;
2544 void
2545 usage (int status)
2547 if (status != EXIT_SUCCESS)
2548 emit_try_help ();
2549 else
2551 printf (_("\
2552 Usage: %s [OPTION] [NUMBER]...\n\
2554 program_name);
2555 fputs (_("\
2556 Print the prime factors of each specified integer NUMBER. If none\n\
2557 are specified on the command line, read them from standard input.\n\
2559 "), stdout);
2560 fputs ("\
2561 -h, --exponents print repeated factors in form p^e unless e is 1\n\
2562 ", stdout);
2563 fputs (HELP_OPTION_DESCRIPTION, stdout);
2564 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2565 emit_ancillary_info (PROGRAM_NAME);
2567 exit (status);
2570 static bool
2571 do_stdin (void)
2573 bool ok = true;
2574 token_buffer tokenbuffer;
2576 init_tokenbuffer (&tokenbuffer);
2578 while (true)
2580 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2581 &tokenbuffer);
2582 if (token_length == (size_t) -1)
2584 if (ferror (stdin))
2585 error (EXIT_FAILURE, errno, _("error reading input"));
2586 break;
2589 ok &= print_factors (tokenbuffer.buffer);
2591 free (tokenbuffer.buffer);
2593 return ok;
2597 main (int argc, char **argv)
2599 initialize_main (&argc, &argv);
2600 set_program_name (argv[0]);
2601 setlocale (LC_ALL, "");
2602 bindtextdomain (PACKAGE, LOCALEDIR);
2603 textdomain (PACKAGE);
2605 lbuf_alloc ();
2606 atexit (close_stdout);
2607 atexit (lbuf_flush);
2609 int c;
2610 while ((c = getopt_long (argc, argv, "h", long_options, nullptr)) != -1)
2612 switch (c)
2614 case 'h': /* NetBSD used -h for this functionality first. */
2615 print_exponents = true;
2616 break;
2618 case DEV_DEBUG_OPTION:
2619 dev_debug = true;
2620 break;
2622 case_GETOPT_HELP_CHAR;
2624 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2626 default:
2627 usage (EXIT_FAILURE);
2631 #if STAT_SQUFOF
2632 memset (q_freq, 0, sizeof (q_freq));
2633 #endif
2635 bool ok;
2636 if (argc <= optind)
2637 ok = do_stdin ();
2638 else
2640 ok = true;
2641 for (int i = optind; i < argc; i++)
2642 if (! print_factors (argv[i]))
2643 ok = false;
2646 #if STAT_SQUFOF
2647 if (q_freq[0] > 0)
2649 double acc_f;
2650 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2651 for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2653 double f = (double) q_freq[i] / q_freq[0];
2654 acc_f += f;
2655 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2656 100.0 * f, 100.0 * acc_f);
2659 #endif
2661 return ok ? EXIT_SUCCESS : EXIT_FAILURE;