beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpz / bin_uiui.c
blob94a9dc5c75d8234e81a02ea903f0b8e681815a37
1 /* mpz_bin_uiui - compute n over k.
3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
5 Copyright 2010-2012 Free Software Foundation, Inc.
7 This file is part of the GNU MP Library.
9 The GNU MP Library is free software; you can redistribute it and/or modify
10 it under the terms of either:
12 * the GNU Lesser General Public License as published by the Free
13 Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
18 * the GNU General Public License as published by the Free Software
19 Foundation; either version 2 of the License, or (at your option) any
20 later version.
22 or both in parallel, as here.
24 The GNU MP Library is distributed in the hope that it will be useful, but
25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 for more details.
29 You should have received copies of the GNU General Public License and the
30 GNU Lesser General Public License along with the GNU MP Library. If not,
31 see https://www.gnu.org/licenses/. */
33 #include "gmp.h"
34 #include "gmp-impl.h"
35 #include "longlong.h"
37 #ifndef BIN_GOETGHELUCK_THRESHOLD
38 #define BIN_GOETGHELUCK_THRESHOLD 1000
39 #endif
40 #ifndef BIN_UIUI_ENABLE_SMALLDC
41 #define BIN_UIUI_ENABLE_SMALLDC 1
42 #endif
43 #ifndef BIN_UIUI_RECURSIVE_SMALLDC
44 #define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
45 #endif
47 /* Algorithm:
49 Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)
50 which are then accumulated into mpn numbers. The first inner loop
51 accumulates divisor factors, the 2nd inner loop accumulates exactly the same
52 number of dividend factors. We avoid accumulating more for the divisor,
53 even with its smaller factors, since we else cannot guarantee divisibility.
55 Since we know each division will yield an integer, we compute the quotient
56 using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod
57 2^t.
59 Improvements:
61 (1) An obvious improvement to this code would be to compute mod 2^t
62 everywhere. Unfortunately, we cannot determine t beforehand, unless we
63 invoke some approximation, such as Stirling's formula. Of course, we don't
64 need t to be tight. However, it is not clear that this would help much,
65 our numbers are kept reasonably small already.
67 (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.
68 Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,
69 would make it both reasonably accurate and fast. (We could use a table
70 stored into a limb, perhaps.) The table should take the removed factors of
71 2 into account (those done on-the-fly in mulN).
73 (3) The first time in the loop we compute the odd part of a
74 factorial in kp, we might use oddfac_1 for this task.
77 /* This threshold determines how large divisor to accumulate before we call
78 bdiv. Perhaps we should never call bdiv, and accumulate all we are told,
79 since we are just basecase code anyway? Presumably, this depends on the
80 relative speed of the asymptotically fast code and this code. */
81 #define SOME_THRESHOLD 20
83 /* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME:
84 All versions of MAXFACS don't take this 2 removal into account now, meaning
85 that then, shifting just adds some overhead. (We remove factors from the
86 completed limb anyway.) */
88 static mp_limb_t
89 mul1 (mp_limb_t m)
91 return m;
94 static mp_limb_t
95 mul2 (mp_limb_t m)
97 /* We need to shift before multiplying, to avoid an overflow. */
98 mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
99 return m01;
102 static mp_limb_t
103 mul3 (mp_limb_t m)
105 mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
106 mp_limb_t m2 = (m + 2);
107 return m01 * m2;
110 static mp_limb_t
111 mul4 (mp_limb_t m)
113 mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
114 mp_limb_t m23 = (m + 2) * (m + 3) >> 1;
115 return m01 * m23;
118 static mp_limb_t
119 mul5 (mp_limb_t m)
121 mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1;
122 mp_limb_t m34 = (m + 3) * (m + 4) >> 1;
123 return m012 * m34;
126 static mp_limb_t
127 mul6 (mp_limb_t m)
129 mp_limb_t m01 = (m + 0) * (m + 1);
130 mp_limb_t m23 = (m + 2) * (m + 3);
131 mp_limb_t m45 = (m + 4) * (m + 5) >> 1;
132 mp_limb_t m0123 = m01 * m23 >> 3;
133 return m0123 * m45;
136 static mp_limb_t
137 mul7 (mp_limb_t m)
139 mp_limb_t m01 = (m + 0) * (m + 1);
140 mp_limb_t m23 = (m + 2) * (m + 3);
141 mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1;
142 mp_limb_t m0123 = m01 * m23 >> 3;
143 return m0123 * m456;
146 static mp_limb_t
147 mul8 (mp_limb_t m)
149 mp_limb_t m01 = (m + 0) * (m + 1);
150 mp_limb_t m23 = (m + 2) * (m + 3);
151 mp_limb_t m45 = (m + 4) * (m + 5);
152 mp_limb_t m67 = (m + 6) * (m + 7);
153 mp_limb_t m0123 = m01 * m23 >> 3;
154 mp_limb_t m4567 = m45 * m67 >> 3;
155 return m0123 * m4567;
158 typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
160 static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8};
161 #define M (numberof(mulfunc))
163 /* Number of factors-of-2 removed by the corresponding mulN function. */
164 static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6};
166 #if 1
167 /* This variant is inaccurate but share the code with other functions. */
168 #define MAXFACS(max,l) \
169 do { \
170 (max) = log_n_max (l); \
171 } while (0)
172 #else
174 /* This variant is exact(?) but uses a loop. It takes the 2 removal
175 of mulN into account. */
176 static const unsigned long ftab[] =
177 #if GMP_NUMB_BITS == 64
178 /* 1 to 8 factors per iteration */
179 {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x100000000),0x32cbfe,0x16a0b,0x24c4,0xa16,0x34b,0x1b2 /*,0xdf,0x8d */};
180 #endif
181 #if GMP_NUMB_BITS == 32
182 /* 1 to 7 factors per iteration */
183 {0xffffffff,0x10000,0x801,0x16b,0x71,0x42,0x26 /* ,0x1e */};
184 #endif
186 #define MAXFACS(max,l) \
187 do { \
188 int __i; \
189 for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \
191 (max) = __i + 1; \
192 } while (0)
193 #endif
195 /* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis
196 is an odd integer. */
197 static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
199 static void
200 mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
202 int nmax, kmax, nmaxnow, numfac;
203 mp_ptr np, kp;
204 mp_size_t nn, kn, alloc;
205 mp_limb_t i, j, t, iii, jjj, cy, dinv;
206 mp_bitcnt_t i2cnt, j2cnt;
207 int cnt;
208 mp_size_t maxn;
209 TMP_DECL;
211 ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
212 TMP_MARK;
214 maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */
216 /* FIXME: This allocation might be insufficient, but is usually way too
217 large. */
218 alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
219 alloc = MIN (alloc, k) + 1;
220 np = TMP_ALLOC_LIMBS (alloc);
221 kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1);
223 MAXFACS (nmax, n);
224 ASSERT (nmax <= M);
225 MAXFACS (kmax, k);
226 ASSERT (kmax <= M);
227 ASSERT (k >= M);
229 i = n - k + 1;
231 np[0] = 1; nn = 1;
233 i2cnt = 0; /* total low zeros in dividend */
234 j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1];
235 /* total low zeros in divisor */
237 numfac = 1;
238 j = ODD_FACTORIAL_TABLE_LIMIT + 1;
239 jjj = ODD_FACTORIAL_TABLE_MAX;
240 ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
242 while (1)
244 kp[0] = jjj; /* store new factors */
245 kn = 1;
246 t = k - j + 1;
247 kmax = MIN (kmax, t);
249 while (kmax != 0 && kn < SOME_THRESHOLD)
251 jjj = mulfunc[kmax - 1] (j);
252 j += kmax; /* number of factors used */
253 count_trailing_zeros (cnt, jjj); /* count low zeros */
254 jjj >>= cnt; /* remove remaining low zeros */
255 j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */
256 cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */
257 kp[kn] = cy;
258 kn += cy != 0;
259 t = k - j + 1;
260 kmax = MIN (kmax, t);
262 numfac = j - numfac;
264 while (numfac != 0)
266 nmaxnow = MIN (nmax, numfac);
267 iii = mulfunc[nmaxnow - 1] (i);
268 i += nmaxnow; /* number of factors used */
269 count_trailing_zeros (cnt, iii); /* count low zeros */
270 iii >>= cnt; /* remove remaining low zeros */
271 i2cnt += tcnttab[nmaxnow - 1] + cnt; /* update low zeros count */
272 cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */
273 np[nn] = cy;
274 nn += cy != 0;
275 numfac -= nmaxnow;
278 ASSERT (nn < alloc);
280 binvert_limb (dinv, kp[0]);
281 nn += (np[nn - 1] >= kp[kn - 1]);
282 nn -= kn;
283 mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
285 if (kmax == 0)
286 break;
287 numfac = j;
289 jjj = mulfunc[kmax - 1] (j);
290 j += kmax; /* number of factors used */
291 count_trailing_zeros (cnt, jjj); /* count low zeros */
292 jjj >>= cnt; /* remove remaining low zeros */
293 j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */
296 /* Put back the right number of factors of 2. */
297 cnt = i2cnt - j2cnt;
298 if (cnt != 0)
300 ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */
301 cy = mpn_lshift (np, np, nn, cnt);
302 np[nn] = cy;
303 nn += cy != 0;
306 nn -= np[nn - 1] == 0; /* normalisation */
308 kp = MPZ_NEWALLOC (r, nn);
309 SIZ(r) = nn;
310 MPN_COPY (kp, np, nn);
311 TMP_FREE;
314 static void
315 mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
317 int nmax, numfac;
318 mp_ptr rp;
319 mp_size_t rn, alloc;
320 mp_limb_t i, iii, cy;
321 mp_bitcnt_t i2cnt, cnt;
323 count_leading_zeros (cnt, (mp_limb_t) n);
324 cnt = GMP_LIMB_BITS - cnt;
325 alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */
326 rp = MPZ_NEWALLOC (r, alloc);
328 MAXFACS (nmax, n);
329 nmax = MIN (nmax, M);
331 i = n - k + 1;
333 nmax = MIN (nmax, k);
334 rp[0] = mulfunc[nmax - 1] (i);
335 rn = 1;
336 i += nmax; /* number of factors used */
337 i2cnt = tcnttab[nmax - 1]; /* low zeros count */
338 numfac = k - nmax;
339 while (numfac != 0)
341 nmax = MIN (nmax, numfac);
342 iii = mulfunc[nmax - 1] (i);
343 i += nmax; /* number of factors used */
344 i2cnt += tcnttab[nmax - 1]; /* update low zeros count */
345 cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */
346 rp[rn] = cy;
347 rn += cy != 0;
348 numfac -= nmax;
351 ASSERT (rn < alloc);
353 mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2],
354 __gmp_fac2cnt_table[k / 2 - 1] - i2cnt);
355 /* A two-fold, branch-free normalisation is possible :*/
356 /* rn -= rp[rn - 1] == 0; */
357 /* rn -= rp[rn - 1] == 0; */
358 MPN_NORMALIZE_NOT_ZERO (rp, rn);
360 SIZ(r) = rn;
363 /* Algorithm:
365 Plain and simply multiply things together.
367 We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such
368 that k!/2^t is odd).
372 static mp_limb_t
373 bc_bin_uiui (unsigned int n, unsigned int k)
375 return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
376 << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
377 & GMP_NUMB_MASK;
380 /* Algorithm:
382 Recursively exploit the relation
383 bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .
385 Values for binomial(k,k>>1) that fit in a limb are precomputed
386 (with inverses).
389 /* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =
390 binomial(i*2,i)/2^t (where t is chosen so that it is odd). */
391 static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
393 /* bin2kkinv[i] = bin2kk[i]^-1 mod B */
394 static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
396 /* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */
397 static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
399 static void
400 mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
402 mp_ptr rp;
403 mp_size_t rn;
404 unsigned long int hk;
406 hk = k >> 1;
408 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
409 mpz_smallk_bin_uiui (r, n, hk);
410 else
411 mpz_smallkdc_bin_uiui (r, n, hk);
412 k -= hk;
413 n -= hk;
414 if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
415 mp_limb_t cy;
416 rn = SIZ (r);
417 rp = MPZ_REALLOC (r, rn + 1);
418 cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
419 rp [rn] = cy;
420 rn += cy != 0;
421 } else {
422 mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
423 mpz_t t;
425 ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
426 PTR (t) = buffer;
427 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
428 mpz_smallk_bin_uiui (t, n, k);
429 else
430 mpz_smallkdc_bin_uiui (t, n, k);
431 mpz_mul (r, r, t);
432 rp = PTR (r);
433 rn = SIZ (r);
436 mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
437 bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
438 fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
439 /* A two-fold, branch-free normalisation is possible :*/
440 /* rn -= rp[rn - 1] == 0; */
441 /* rn -= rp[rn - 1] == 0; */
442 MPN_NORMALIZE_NOT_ZERO (rp, rn);
444 SIZ(r) = rn;
447 /* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).
449 * Contributed to the GNU project by Marco Bodrato.
451 * Implementation of the algorithm by P. Goetgheluck, "Computing
452 * Binomial Coefficients", The American Mathematical Monthly, Vol. 94,
453 * No. 4 (April 1987), pp. 360-365.
455 * Acknowledgment: Peter Luschny did spot the slowness of the previous
456 * code and suggested the reference.
459 /* TODO: Remove duplicated constants / macros / static functions...
462 /*************************************************************/
463 /* Section macros: common macros, for swing/fac/bin (&sieve) */
464 /*************************************************************/
466 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
467 if ((PR) > (MAX_PR)) { \
468 (VEC)[(I)++] = (PR); \
469 (PR) = 1; \
472 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
473 do { \
474 if ((PR) > (MAX_PR)) { \
475 (VEC)[(I)++] = (PR); \
476 (PR) = (P); \
477 } else \
478 (PR) *= (P); \
479 } while (0)
481 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
482 __max_i = (end); \
484 do { \
485 ++__i; \
486 if (((sieve)[__index] & __mask) == 0) \
488 (prime) = id_to_n(__i)
490 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
491 do { \
492 mp_limb_t __mask, __index, __max_i, __i; \
494 __i = (start)-(off); \
495 __index = __i / GMP_LIMB_BITS; \
496 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
497 __i += (off); \
499 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
501 #define LOOP_ON_SIEVE_STOP \
503 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
504 __index += __mask & 1; \
505 } while (__i <= __max_i) \
507 #define LOOP_ON_SIEVE_END \
508 LOOP_ON_SIEVE_STOP; \
509 } while (0)
511 /*********************************************************/
512 /* Section sieve: sieving functions and tools for primes */
513 /*********************************************************/
515 #if WANT_ASSERT
516 static mp_limb_t
517 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
518 #endif
520 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
521 static mp_limb_t
522 id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
524 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
525 static mp_limb_t
526 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
528 static mp_size_t
529 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
531 /*********************************************************/
532 /* Section binomial: fast binomial implementation */
533 /*********************************************************/
535 #define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
536 do { \
537 mp_limb_t __a, __b, __prime, __ma,__mb; \
538 __prime = (P); \
539 __a = (N); __b = (K); __mb = 0; \
540 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
541 do { \
542 __mb += __b % __prime; __b /= __prime; \
543 __ma = __a % __prime; __a /= __prime; \
544 if (__ma < __mb) { \
545 __mb = 1; (PR) *= __prime; \
546 } else __mb = 0; \
547 } while (__a >= __prime); \
548 } while (0)
550 #define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
551 do { \
552 mp_limb_t __prime; \
553 __prime = (P); \
554 if (((N) % __prime) < ((K) % __prime)) { \
555 FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \
557 } while (0)
559 /* Returns an approximation of the sqare root of x. *
560 * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
561 static mp_limb_t
562 limb_apprsqrt (mp_limb_t x)
564 int s;
566 ASSERT (x > 2);
567 count_leading_zeros (s, x - 1);
568 s = GMP_LIMB_BITS - 1 - s;
569 return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
572 static void
573 mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
575 mp_limb_t *sieve, *factors, count;
576 mp_limb_t prod, max_prod, j;
577 TMP_DECL;
579 ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
580 ASSERT (n >= 25);
582 TMP_MARK;
583 sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
585 count = gmp_primesieve (sieve, n) + 1;
586 factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
588 max_prod = GMP_NUMB_MAX / n;
590 /* Handle primes = 2, 3 separately. */
591 popc_limb (count, n - k);
592 popc_limb (j, k);
593 count += j;
594 popc_limb (j, n);
595 count -= j;
596 prod = CNST_LIMB(1) << count;
598 j = 0;
599 COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
601 /* Accumulate prime factors from 5 to n/2 */
603 mp_limb_t s;
606 mp_limb_t prime;
607 s = limb_apprsqrt(n);
608 s = n_to_bit (s);
609 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
610 COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
611 LOOP_ON_SIEVE_END;
612 s++;
615 ASSERT (max_prod <= GMP_NUMB_MAX / 2);
616 max_prod <<= 1;
617 ASSERT (bit_to_n (s) * bit_to_n (s) > n);
618 ASSERT (s <= n_to_bit (n >> 1));
620 mp_limb_t prime;
622 LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve);
623 SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
624 LOOP_ON_SIEVE_END;
626 max_prod >>= 1;
629 /* Store primes from (n-k)+1 to n */
630 ASSERT (n_to_bit (n - k) < n_to_bit (n));
632 mp_limb_t prime;
633 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
634 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
635 LOOP_ON_SIEVE_END;
638 if (LIKELY (j != 0))
640 factors[j++] = prod;
641 mpz_prodlimbs (r, factors, j);
643 else
645 PTR (r)[0] = prod;
646 SIZ (r) = 1;
648 TMP_FREE;
651 #undef COUNT_A_PRIME
652 #undef SH_COUNT_A_PRIME
653 #undef LOOP_ON_SIEVE_END
654 #undef LOOP_ON_SIEVE_STOP
655 #undef LOOP_ON_SIEVE_BEGIN
656 #undef LOOP_ON_SIEVE_CONTINUE
658 /*********************************************************/
659 /* End of implementation of Goetgheluck's algorithm */
660 /*********************************************************/
662 void
663 mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
665 if (UNLIKELY (n < k)) {
666 SIZ (r) = 0;
667 #if BITS_PER_ULONG > GMP_NUMB_BITS
668 } else if (UNLIKELY (n > GMP_NUMB_MAX)) {
669 mpz_t tmp;
671 mpz_init_set_ui (tmp, n);
672 mpz_bin_ui (r, tmp, k);
673 mpz_clear (tmp);
674 #endif
675 } else {
676 ASSERT (n <= GMP_NUMB_MAX);
677 /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
678 k = MIN (k, n - k);
679 if (k < 2) {
680 PTR(r)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */
681 SIZ(r) = 1;
682 } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */
683 PTR(r)[0] = bc_bin_uiui (n, k);
684 SIZ(r) = 1;
685 } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
686 mpz_smallk_bin_uiui (r, n, k);
687 else if (BIN_UIUI_ENABLE_SMALLDC &&
688 k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
689 mpz_smallkdc_bin_uiui (r, n, k);
690 else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
691 k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */
692 mpz_goetgheluck_bin_uiui (r, n, k);
693 else
694 mpz_bdiv_bin_uiui (r, n, k);