btxld(8): Convert to hostprog.
[dragonfly.git] / contrib / gmp / gen-psqr.c
blob9c33d7a681056e7849b6037073497e0ab5bd6520
1 /* Generate perfect square testing data.
3 Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20 #include <stdio.h>
21 #include <stdlib.h>
23 #include "dumbmp.c"
26 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
27 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
28 residues and non-residues modulo small factors of that modulus.
30 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
31 function exists specifically because 2^24-1 and 2^48-1 have nice sets of
32 prime factors. For other limb sizes it's considered, but if it doesn't
33 have good factors then mpn_mod_1 will be used instead.
35 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
36 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
37 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
38 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
39 calculation within a single limb.
41 In either case primes can be combined to make divisors. The table data
42 then effectively indicates remainders which are quadratic residues mod
43 all the primes. This sort of combining reduces the number of steps
44 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
45 Nothing is gained or lost in terms of detections, the same total fraction
46 of non-residues will be identified.
48 Nothing particularly sophisticated is attempted for combining factors to
49 make divisors. This is probably a kind of knapsack problem so it'd be
50 too hard to attempt anything completely general. For the usual 32 and 64
51 bit limbs we get a good enough result just pairing the biggest and
52 smallest which fit together, repeatedly.
54 Another aim is to get powerful combinations, ie. divisors which identify
55 biggest fraction of non-residues, and have those run first. Again for
56 the usual 32 and 64 bits it seems good enough just to pair for big
57 divisors then sort according to the resulting fraction of non-residues
58 identified.
60 Also in this program, a table sq_res_0x100 of residues modulo 256 is
61 generated. This simply fills bits into limbs of the appropriate
62 build-time GMP_LIMB_BITS each.
67 /* Normally we aren't using const in gen*.c programs, so as not to have to
68 bother figuring out if it works, but using it with f_cmp_divisor and
69 f_cmp_fraction avoids warnings from the qsort calls. */
71 /* Same tests as gmp.h. */
72 #if defined (__STDC__) \
73 || defined (__cplusplus) \
74 || defined (_AIX) \
75 || defined (__DECC) \
76 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
77 || defined (_MSC_VER) \
78 || defined (_WIN32)
79 #define HAVE_CONST 1
80 #endif
82 #if ! HAVE_CONST
83 #define const
84 #endif
87 mpz_t *sq_res_0x100; /* table of limbs */
88 int nsq_res_0x100; /* elements in sq_res_0x100 array */
89 int sq_res_0x100_num; /* squares in sq_res_0x100 */
90 double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
92 int mod34_bits; /* 3*GMP_NUMB_BITS/4 */
93 int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
94 int max_divisor; /* all divisors <= max_divisor */
95 int max_divisor_bits; /* ceil(log2(max_divisor)) */
96 double total_fraction; /* of squares */
97 mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
98 mpz_t pp_norm; /* pp shifted so NUMB high bit set */
99 mpz_t pp_inverted; /* invert_limb style inverse */
100 mpz_t mod_mask; /* 2^mod_bits-1 */
101 char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
103 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
104 struct rawfactor_t {
105 int divisor;
106 int multiplicity;
108 struct rawfactor_t *rawfactor;
109 int nrawfactor;
111 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
112 struct factor_t {
113 int divisor;
114 mpz_t inverse; /* 1/divisor mod 2^mod_bits */
115 mpz_t mask; /* indicating squares mod divisor */
116 double fraction; /* squares/total */
118 struct factor_t *factor;
119 int nfactor; /* entries in use in factor array */
120 int factor_alloc; /* entries allocated to factor array */
124 f_cmp_divisor (const void *parg, const void *qarg)
126 const struct factor_t *p, *q;
127 p = parg;
128 q = qarg;
129 if (p->divisor > q->divisor)
130 return 1;
131 else if (p->divisor < q->divisor)
132 return -1;
133 else
134 return 0;
138 f_cmp_fraction (const void *parg, const void *qarg)
140 const struct factor_t *p, *q;
141 p = parg;
142 q = qarg;
143 if (p->fraction > q->fraction)
144 return 1;
145 else if (p->fraction < q->fraction)
146 return -1;
147 else
148 return 0;
151 /* Remove array[idx] by copying the remainder down, and adjust narray
152 accordingly. */
153 #define COLLAPSE_ELEMENT(array, idx, narray) \
154 do { \
155 mem_copyi ((char *) &(array)[idx], \
156 (char *) &(array)[idx+1], \
157 ((narray)-((idx)+1)) * sizeof (array[0])); \
158 (narray)--; \
159 } while (0)
162 /* return n*2^p mod m */
164 mul_2exp_mod (int n, int p, int m)
166 int i;
167 for (i = 0; i < p; i++)
168 n = (2 * n) % m;
169 return n;
172 /* return -n mod m */
174 neg_mod (int n, int m)
176 ASSERT (n >= 0 && n < m);
177 return (n == 0 ? 0 : m-n);
180 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
181 "-(idx<<mod_bits)" can be a square modulo m. */
182 void
183 square_mask (mpz_t mask, int m)
185 int p, i, r, idx;
187 p = mul_2exp_mod (1, mod_bits, m);
188 p = neg_mod (p, m);
190 mpz_set_ui (mask, 0L);
191 for (i = 0; i < m; i++)
193 r = (i * i) % m;
194 idx = (r * p) % m;
195 mpz_setbit (mask, (unsigned long) idx);
199 void
200 generate_sq_res_0x100 (int limb_bits)
202 int i, res;
204 nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
205 sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
207 for (i = 0; i < nsq_res_0x100; i++)
208 mpz_init_set_ui (sq_res_0x100[i], 0L);
210 for (i = 0; i < 0x100; i++)
212 res = (i * i) % 0x100;
213 mpz_setbit (sq_res_0x100[res / limb_bits],
214 (unsigned long) (res % limb_bits));
217 sq_res_0x100_num = 0;
218 for (i = 0; i < nsq_res_0x100; i++)
219 sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
220 sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
223 void
224 generate_mod (int limb_bits, int nail_bits)
226 int numb_bits = limb_bits - nail_bits;
227 int i, divisor;
229 mpz_init_set_ui (pp, 0L);
230 mpz_init_set_ui (pp_norm, 0L);
231 mpz_init_set_ui (pp_inverted, 0L);
233 /* no more than limb_bits many factors in a one limb modulus (and of
234 course in reality nothing like that many) */
235 factor_alloc = limb_bits;
236 factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
237 rawfactor = (struct rawfactor_t *)
238 xmalloc (factor_alloc * sizeof (*rawfactor));
240 if (numb_bits % 4 != 0)
242 strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
243 goto use_pp;
246 max_divisor = 2*limb_bits;
247 max_divisor_bits = log2_ceil (max_divisor);
249 if (numb_bits / 4 < max_divisor_bits)
251 /* Wind back to one limb worth of max_divisor, if that will let us use
252 mpn_mod_34lsub1. */
253 max_divisor = limb_bits;
254 max_divisor_bits = log2_ceil (max_divisor);
256 if (numb_bits / 4 < max_divisor_bits)
258 strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
259 goto use_pp;
264 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
265 mpz_t m, q, r;
266 int multiplicity;
268 mod34_bits = (numb_bits / 4) * 3;
270 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
271 the mod34_bits mark, adding the two halves for a remainder of at most
272 mod34_bits+1 many bits */
273 mod_bits = mod34_bits + 1;
275 mpz_init_set_ui (m, 1L);
276 mpz_mul_2exp (m, m, mod34_bits);
277 mpz_sub_ui (m, m, 1L);
279 mpz_init (q);
280 mpz_init (r);
282 for (i = 3; i <= max_divisor; i++)
284 if (! isprime (i))
285 continue;
287 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
288 if (mpz_sgn (r) != 0)
289 continue;
291 /* if a repeated prime is found it's used as an i^n in one factor */
292 divisor = 1;
293 multiplicity = 0;
296 if (divisor > max_divisor / i)
297 break;
298 multiplicity++;
299 mpz_set (m, q);
300 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
302 while (mpz_sgn (r) == 0);
304 ASSERT (nrawfactor < factor_alloc);
305 rawfactor[nrawfactor].divisor = i;
306 rawfactor[nrawfactor].multiplicity = multiplicity;
307 nrawfactor++;
310 mpz_clear (m);
311 mpz_clear (q);
312 mpz_clear (r);
315 if (nrawfactor <= 2)
317 mpz_t new_pp;
319 sprintf (mod34_excuse, "only %d small factor%s",
320 nrawfactor, nrawfactor == 1 ? "" : "s");
322 use_pp:
323 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
324 tried with just one */
325 max_divisor = 2*limb_bits;
326 max_divisor_bits = log2_ceil (max_divisor);
328 mpz_init (new_pp);
329 nrawfactor = 0;
330 mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
332 /* one copy of each small prime */
333 mpz_set_ui (pp, 1L);
334 for (i = 3; i <= max_divisor; i++)
336 if (! isprime (i))
337 continue;
339 mpz_mul_ui (new_pp, pp, (unsigned long) i);
340 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
341 break;
342 mpz_set (pp, new_pp);
344 ASSERT (nrawfactor < factor_alloc);
345 rawfactor[nrawfactor].divisor = i;
346 rawfactor[nrawfactor].multiplicity = 1;
347 nrawfactor++;
350 /* Plus an extra copy of one or more of the primes selected, if that
351 still fits in max_divisor and the total in mod_bits. Usually only
352 3 or 5 will be candidates */
353 for (i = nrawfactor-1; i >= 0; i--)
355 if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
356 continue;
357 mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
358 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
359 continue;
360 mpz_set (pp, new_pp);
362 rawfactor[i].multiplicity++;
365 mod_bits = mpz_sizeinbase (pp, 2);
367 mpz_set (pp_norm, pp);
368 while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
369 mpz_add (pp_norm, pp_norm, pp_norm);
371 mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
373 mpz_clear (new_pp);
376 /* start the factor array */
377 for (i = 0; i < nrawfactor; i++)
379 int j;
380 ASSERT (nfactor < factor_alloc);
381 factor[nfactor].divisor = 1;
382 for (j = 0; j < rawfactor[i].multiplicity; j++)
383 factor[nfactor].divisor *= rawfactor[i].divisor;
384 nfactor++;
387 combine:
388 /* Combine entries in the factor array. Combine the smallest entry with
389 the biggest one that will fit with it (ie. under max_divisor), then
390 repeat that with the new smallest entry. */
391 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
392 for (i = nfactor-1; i >= 1; i--)
394 if (factor[i].divisor <= max_divisor / factor[0].divisor)
396 factor[0].divisor *= factor[i].divisor;
397 COLLAPSE_ELEMENT (factor, i, nfactor);
398 goto combine;
402 total_fraction = 1.0;
403 for (i = 0; i < nfactor; i++)
405 mpz_init (factor[i].inverse);
406 mpz_invert_ui_2exp (factor[i].inverse,
407 (unsigned long) factor[i].divisor,
408 (unsigned long) mod_bits);
410 mpz_init (factor[i].mask);
411 square_mask (factor[i].mask, factor[i].divisor);
413 /* fraction of possible squares */
414 factor[i].fraction = (double) mpz_popcount (factor[i].mask)
415 / factor[i].divisor;
417 /* total fraction of possible squares */
418 total_fraction *= factor[i].fraction;
421 /* best tests first (ie. smallest fraction) */
422 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
425 void
426 print (int limb_bits, int nail_bits)
428 int i;
429 mpz_t mhi, mlo;
431 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
432 printf ("\n");
434 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
435 limb_bits, nail_bits);
436 printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
437 limb_bits, nail_bits);
438 printf ("#endif\n");
439 printf ("\n");
441 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
442 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
443 (1.0 - sq_res_0x100_fraction) * 100.0,
444 0x100 - sq_res_0x100_num);
445 printf ("static const mp_limb_t\n");
446 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
447 for (i = 0; i < nsq_res_0x100; i++)
449 printf (" CNST_LIMB(0x");
450 mpz_out_str (stdout, 16, sq_res_0x100[i]);
451 printf ("),\n");
453 printf ("};\n");
454 printf ("\n");
456 if (mpz_sgn (pp) != 0)
458 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
459 printf ("/* PERFSQR_PP = ");
461 else
462 printf ("/* 2^%d-1 = ", mod34_bits);
463 for (i = 0; i < nrawfactor; i++)
465 if (i != 0)
466 printf (" * ");
467 printf ("%d", rawfactor[i].divisor);
468 if (rawfactor[i].multiplicity != 1)
469 printf ("^%d", rawfactor[i].multiplicity);
471 printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
473 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
474 if (mpz_sgn (pp) != 0)
476 printf ("#define PERFSQR_PP CNST_LIMB(0x");
477 mpz_out_str (stdout, 16, pp);
478 printf (")\n");
479 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
480 mpz_out_str (stdout, 16, pp_norm);
481 printf (")\n");
482 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
483 mpz_out_str (stdout, 16, pp_inverted);
484 printf (")\n");
486 printf ("\n");
488 mpz_init (mhi);
489 mpz_init (mlo);
491 printf ("/* This test identifies %.2f%% as non-squares. */\n",
492 (1.0 - total_fraction) * 100.0);
493 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
494 printf (" do { \\\n");
495 printf (" mp_limb_t r; \\\n");
496 if (mpz_sgn (pp) != 0)
497 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
498 else
499 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
501 for (i = 0; i < nfactor; i++)
503 printf (" \\\n");
504 printf (" /* %5.2f%% */ \\\n",
505 (1.0 - factor[i].fraction) * 100.0);
507 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
508 factor[i].divisor <= limb_bits ? 1 : 2,
509 factor[i].divisor);
510 mpz_out_str (stdout, 16, factor[i].inverse);
511 printf ("), \\\n");
512 printf (" CNST_LIMB(0x");
514 if ( factor[i].divisor <= limb_bits)
516 mpz_out_str (stdout, 16, factor[i].mask);
518 else
520 mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
521 mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
522 mpz_out_str (stdout, 16, mhi);
523 printf ("), CNST_LIMB(0x");
524 mpz_out_str (stdout, 16, mlo);
526 printf (")); \\\n");
529 printf (" } while (0)\n");
530 printf ("\n");
532 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
533 (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
534 printf ("\n");
536 printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
537 printf ("#define PERFSQR_DIVISORS { 256,");
538 for (i = 0; i < nfactor; i++)
539 printf (" %d,", factor[i].divisor);
540 printf (" }\n");
543 mpz_clear (mhi);
544 mpz_clear (mlo);
548 main (int argc, char *argv[])
550 int limb_bits, nail_bits;
552 if (argc != 3)
554 fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
555 exit (1);
558 limb_bits = atoi (argv[1]);
559 nail_bits = atoi (argv[2]);
561 if (limb_bits <= 0
562 || nail_bits < 0
563 || nail_bits >= limb_bits)
565 fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
566 limb_bits, nail_bits);
567 exit (1);
570 generate_sq_res_0x100 (limb_bits);
571 generate_mod (limb_bits, nail_bits);
573 print (limb_bits, nail_bits);
575 return 0;