1 /* mpz_fac_ui(result, n) -- Set RESULT to N!.
3 Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
28 static void odd_product
__GMP_PROTO ((unsigned long, unsigned long, mpz_t
*));
29 static void ap_product_small
__GMP_PROTO ((mpz_t
, mp_limb_t
, mp_limb_t
, unsigned long, unsigned long));
35 /* for single non-zero limb */
36 #define MPZ_SET_1_NZ(z,n) \
44 /* for src>0 and n>0 */
45 #define MPZ_MUL_1_POS(dst,src,n) \
47 mpz_ptr __dst = (dst); \
48 mpz_srcptr __src = (src); \
49 mp_size_t __size = SIZ(__src); \
53 ASSERT (__size > 0); \
56 MPZ_REALLOC (__dst, __size+1); \
57 __dst_p = PTR(__dst); \
59 __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n); \
60 __dst_p[__size] = __c; \
61 SIZ(__dst) = __size + (__c != 0); \
65 #if BITS_PER_ULONG == GMP_LIMB_BITS
66 #define BSWAP_ULONG(x,y) BSWAP_LIMB(x,y)
69 /* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
70 by a shift down to get the high part. But it provoked incorrect code
71 from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode. This
72 case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
73 can get that directly muxing a 4-byte ulong if it matters enough. */
75 #if ! defined (BSWAP_ULONG)
76 #define BSWAP_ULONG(dst, src) \
78 unsigned long __bswapl_src = (src); \
79 unsigned long __bswapl_dst = 0; \
81 for (__i = 0; __i < sizeof(unsigned long); __i++) \
83 __bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF); \
86 (dst) = __bswapl_dst; \
90 /* x is bit reverse of y */
91 /* Note the divides below are all exact */
92 #define BITREV_ULONG(x,y) \
94 unsigned long __dst; \
95 BSWAP_ULONG(__dst,y); \
96 __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
97 __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4) ); \
98 __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2) ); \
101 /* above could be improved if cpu has a nibble/bit swap/muxing instruction */
102 /* above code is serialized, possible to write as a big parallel expression */
107 mpz_fac_ui (mpz_ptr x
, unsigned long n
)
109 unsigned long z
, stt
;
111 mpz_t t1
, st
[8 * sizeof (unsigned long) + 1 - APCONST
];
114 static const mp_limb_t table
[] = { ONE_LIMB_FACTORIAL_TABLE
};
116 if (n
< numberof (table
))
118 MPZ_SET_1_NZ (x
, table
[n
]);
122 /* NOTE : MUST have n>=3 here */
124 /* for estimating the alloc sizes the calculation of these formula's is not
125 exact and also the formulas are only approximations, also we ignore
126 the few "side" calculations, correct allocation seems to speed up the
127 small sizes better, having very little effect on the large sizes */
129 /* estimate space for stack entries see below
130 number of bits for n! is
131 (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
132 2.325748065-n*1.442695041+(n+0.5)*log_2(n) */
133 umul_ppmm (d
[1], d
[0], (mp_limb_t
) n
, (mp_limb_t
) FAC2OVERE
);
134 /* d[1] is 2n/e, d[0] ignored */
135 count_leading_zeros (z
, d
[1]);
136 z
= GMP_LIMB_BITS
- z
- 1; /* z=floor(log_2(2n/e)) */
137 umul_ppmm (d
[1], d
[0], (mp_limb_t
) n
, (mp_limb_t
) z
);
138 /* d=n*floor(log_2(2n/e)) */
139 d
[0] = (d
[0] >> 2) | (d
[1] << (GMP_LIMB_BITS
- 2));
141 /* d=n*floor(log_2(2n/e))/4 */
142 z
= d
[0] + 1; /* have to ignore any overflow */
143 /* so z is the number of bits wanted for st[0] */
146 if (n
<= ((unsigned long) 1) << (APCONST
))
148 mpz_realloc2 (x
, 4 * z
);
149 ap_product_small (x
, CNST_LIMB(2), CNST_LIMB(1), n
- 1, 4L);
152 if (n
<= ((unsigned long) 1) << (APCONST
+ 1))
153 { /* use n!=odd(1,n)*(n/2)!*2^(n/2) */
154 mpz_init2 (t1
, 2 * z
);
155 mpz_realloc2 (x
, 4 * z
);
156 ap_product_small (x
, CNST_LIMB(2), CNST_LIMB(1), n
/ 2 - 1, 4L);
157 ap_product_small (t1
, CNST_LIMB(3), CNST_LIMB(2), (n
- 1) / 2, 4L);
160 mpz_mul_2exp (x
, x
, n
/ 2);
163 if (n
<= ((unsigned long) 1) << (APCONST
+ 2))
165 /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
166 so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
167 mpz_init2 (t1
, 2 * z
);
168 mpz_realloc2 (x
, 4 * z
);
169 for (i
= 0; i
< 4; i
++)
171 mpz_init2 (st
[i
], z
);
174 odd_product (1, n
/ 2, st
);
176 odd_product (n
/ 2, n
, st
);
178 ASSERT (n
/ 4 <= FACMUL4
+ 6);
179 ap_product_small (t1
, CNST_LIMB(2), CNST_LIMB(1), n
/ 4 - 1, 4L);
180 /* must have 2^APCONST odd numbers max */
181 mpz_mul (t1
, t1
, st
[0]);
182 for (i
= 0; i
< 4; i
++)
186 mpz_mul_2exp (x
, x
, n
/ 2 + n
/ 4);
190 count_leading_zeros (stt
, (mp_limb_t
) n
);
191 stt
= GMP_LIMB_BITS
- stt
+ 1 - APCONST
;
193 for (i
= 0; i
< (signed long) stt
; i
++)
195 mpz_init2 (st
[i
], z
);
199 count_leading_zeros (z
, (mp_limb_t
) (n
/ 3));
200 /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
201 z
= GMP_LIMB_BITS
- z
;
204 n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
205 where 2^e || n! 3.2^z>n C_2(a,b)=PRODUCT of odd z such that a<z<=b
209 mpz_init_set_ui (t1
, 1);
210 for (j
= 8 * sizeof (unsigned long) / 2; j
!= 0; j
>>= 1)
213 for (i
= 8 * sizeof (unsigned long) - j
; i
>= j
; i
-= 2 * j
)
214 if ((signed long) z
>= i
)
216 odd_product (n
>> i
, n
>> (i
- 1), st
);
217 /* largest odd product when j=i=1 then we have
218 odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
219 so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
220 number of bits is n*log_base2(2n/e)/4+1 */
222 mpz_pow_ui (st
[0], st
[0], i
/ j
);
223 mpz_mul (x
, x
, st
[0]);
225 if ((signed long) z
>= j
&& j
!= 1)
228 mpz_mul (t1
, t1
, t1
);
231 for (i
= 0; i
< (signed long) stt
; i
++)
235 popc_limb (i
, (mp_limb_t
) n
);
236 mpz_mul_2exp (x
, x
, n
- i
);
240 /* start,step are mp_limb_t although they will fit in unsigned long */
242 ap_product_small (mpz_t ret
, mp_limb_t start
, mp_limb_t step
,
243 unsigned long count
, unsigned long nm
)
248 ASSERT (count
<= (((unsigned long) 1) << APCONST
));
249 /* count can never be zero ? check this and remove test below */
252 MPZ_SET_1_NZ (ret
, 1);
257 MPZ_SET_1_NZ (ret
, start
);
263 MPZ_SET_1_NZ (ret
, start
);
265 for (a
= 0; a
< count
- 1; b
+= step
, a
++)
266 MPZ_MUL_1_POS (ret
, ret
, b
);
269 MPZ_SET_1_NZ (ret
, start
* (start
+ step
));
272 for (b
= start
+ 2 * step
, a
= count
/ 2 - 1; a
!= 0;
274 MPZ_MUL_1_POS (ret
, ret
, b
* (b
+ step
));
276 MPZ_MUL_1_POS (ret
, ret
, b
);
281 MPZ_SET_1_NZ (ret
, start
* (start
+ step
));
284 MPZ_SET_1_NZ (ret
, start
* (start
+ step
) * (start
+ 2 * step
));
287 for (b
= start
+ 3 * step
, a
= count
/ 3 - 1; a
!= 0;
289 MPZ_MUL_1_POS (ret
, ret
, b
* (b
+ step
) * (b
+ 2 * step
));
293 MPZ_MUL_1_POS (ret
, ret
, b
);
295 default: /* ie nm=4 */
298 MPZ_SET_1_NZ (ret
, start
* (start
+ step
));
303 MPZ_SET_1_NZ (ret
, start
* (start
+ step
) * (start
+ 2 * step
));
307 start
* (start
+ step
) * (start
+ 2 * step
) * (start
+
311 for (b
= start
+ 4 * step
, a
= count
/ 4 - 1; a
!= 0;
313 MPZ_MUL_1_POS (ret
, ret
,
314 b
* (b
+ step
) * (b
+ 2 * step
) * (b
+ 3 * step
));
318 b
= b
* (b
+ step
) * (b
+ 2 * step
);
320 MPZ_MUL_1_POS (ret
, ret
, b
);
325 /* return value in st[0]
326 odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
327 so st[0] needs enough bits for above, st[1] needs half these bits and
328 st[2] needs 1/4 of these bits etc */
330 odd_product (unsigned long low
, unsigned long high
, mpz_t
* st
)
332 unsigned long stc
= 1, stn
= 0, n
, y
, mask
, a
, nm
= 1;
342 /* must have high>=low ? check this and remove test below */
345 MPZ_SET_1_NZ (st
[0], 1);
350 MPZ_SET_1_NZ (st
[0], low
);
353 if (high
<= FACMUL2
+ 2)
356 if (high
<= FACMUL3
+ 4)
359 if (high
<= FACMUL4
+ 6)
363 high
= (high
- low
) / 2 + 1; /* high is now count,high<=2^(BITS_PER_ULONG-1) */
364 if (high
<= (((unsigned long) 1) << APCONST
))
366 ap_product_small (st
[0], (mp_limb_t
) low
, CNST_LIMB(2), high
, nm
);
369 count_leading_zeros (n
, (mp_limb_t
) high
);
370 /* assumes clz above is LIMB based not NUMB based */
371 n
= GMP_LIMB_BITS
- n
- APCONST
;
372 mask
= (((unsigned long) 1) << n
);
375 /* have 2^(BITS_IN_N-APCONST) iterations so need
376 (BITS_IN_N-APCONST+1) stack entries */
377 for (z
= mask
; z
>= 0; z
--)
380 y
>>= (BITS_PER_ULONG
- n
);
381 ap_product_small (st
[stn
],
382 (mp_limb_t
) (low
+ 2 * ((~y
) & mask
)), (mp_limb_t
) a
,
383 (high
+ y
) >> n
, nm
);
384 ASSERT (((high
+ y
) >> n
) <= (((unsigned long) 1) << APCONST
));
389 mpz_mul (st
[stn
- 2], st
[stn
- 2], st
[stn
- 1]);