miniupnpd 1.9 (20160113)
[tomato.git] / release / src / router / gmp / gen-psqr.c
blob31977cda1c2b5617d15faa532008c6e8b7deff18
1 /* Generate perfect square testing data.
3 Copyright 2002-2004, 2012 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
20 or both in parallel, as here.
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 for more details.
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library. If not,
29 see https://www.gnu.org/licenses/. */
31 #include <stdio.h>
32 #include <stdlib.h>
34 #include "bootstrap.c"
37 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
38 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
39 residues and non-residues modulo small factors of that modulus.
41 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
42 function exists specifically because 2^24-1 and 2^48-1 have nice sets of
43 prime factors. For other limb sizes it's considered, but if it doesn't
44 have good factors then mpn_mod_1 will be used instead.
46 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
47 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
48 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
49 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
50 calculation within a single limb.
52 In either case primes can be combined to make divisors. The table data
53 then effectively indicates remainders which are quadratic residues mod
54 all the primes. This sort of combining reduces the number of steps
55 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
56 Nothing is gained or lost in terms of detections, the same total fraction
57 of non-residues will be identified.
59 Nothing particularly sophisticated is attempted for combining factors to
60 make divisors. This is probably a kind of knapsack problem so it'd be
61 too hard to attempt anything completely general. For the usual 32 and 64
62 bit limbs we get a good enough result just pairing the biggest and
63 smallest which fit together, repeatedly.
65 Another aim is to get powerful combinations, ie. divisors which identify
66 biggest fraction of non-residues, and have those run first. Again for
67 the usual 32 and 64 bits it seems good enough just to pair for big
68 divisors then sort according to the resulting fraction of non-residues
69 identified.
71 Also in this program, a table sq_res_0x100 of residues modulo 256 is
72 generated. This simply fills bits into limbs of the appropriate
73 build-time GMP_LIMB_BITS each.
78 /* Normally we aren't using const in gen*.c programs, so as not to have to
79 bother figuring out if it works, but using it with f_cmp_divisor and
80 f_cmp_fraction avoids warnings from the qsort calls. */
82 /* Same tests as gmp.h. */
83 #if defined (__STDC__) \
84 || defined (__cplusplus) \
85 || defined (_AIX) \
86 || defined (__DECC) \
87 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
88 || defined (_MSC_VER) \
89 || defined (_WIN32)
90 #define HAVE_CONST 1
91 #endif
93 #if ! HAVE_CONST
94 #define const
95 #endif
98 mpz_t *sq_res_0x100; /* table of limbs */
99 int nsq_res_0x100; /* elements in sq_res_0x100 array */
100 int sq_res_0x100_num; /* squares in sq_res_0x100 */
101 double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */
103 int mod34_bits; /* 3*GMP_NUMB_BITS/4 */
104 int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */
105 int max_divisor; /* all divisors <= max_divisor */
106 int max_divisor_bits; /* ceil(log2(max_divisor)) */
107 double total_fraction; /* of squares */
108 mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */
109 mpz_t pp_norm; /* pp shifted so NUMB high bit set */
110 mpz_t pp_inverted; /* invert_limb style inverse */
111 mpz_t mod_mask; /* 2^mod_bits-1 */
112 char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
114 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
115 struct rawfactor_t {
116 int divisor;
117 int multiplicity;
119 struct rawfactor_t *rawfactor;
120 int nrawfactor;
122 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
123 struct factor_t {
124 int divisor;
125 mpz_t inverse; /* 1/divisor mod 2^mod_bits */
126 mpz_t mask; /* indicating squares mod divisor */
127 double fraction; /* squares/total */
129 struct factor_t *factor;
130 int nfactor; /* entries in use in factor array */
131 int factor_alloc; /* entries allocated to factor array */
135 f_cmp_divisor (const void *parg, const void *qarg)
137 const struct factor_t *p, *q;
138 p = parg;
139 q = qarg;
140 if (p->divisor > q->divisor)
141 return 1;
142 else if (p->divisor < q->divisor)
143 return -1;
144 else
145 return 0;
149 f_cmp_fraction (const void *parg, const void *qarg)
151 const struct factor_t *p, *q;
152 p = parg;
153 q = qarg;
154 if (p->fraction > q->fraction)
155 return 1;
156 else if (p->fraction < q->fraction)
157 return -1;
158 else
159 return 0;
162 /* Remove array[idx] by copying the remainder down, and adjust narray
163 accordingly. */
164 #define COLLAPSE_ELEMENT(array, idx, narray) \
165 do { \
166 memmove (&(array)[idx], \
167 &(array)[idx+1], \
168 ((narray)-((idx)+1)) * sizeof (array[0])); \
169 (narray)--; \
170 } while (0)
173 /* return n*2^p mod m */
175 mul_2exp_mod (int n, int p, int m)
177 int i;
178 for (i = 0; i < p; i++)
179 n = (2 * n) % m;
180 return n;
183 /* return -n mod m */
185 neg_mod (int n, int m)
187 assert (n >= 0 && n < m);
188 return (n == 0 ? 0 : m-n);
191 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
192 "-(idx<<mod_bits)" can be a square modulo m. */
193 void
194 square_mask (mpz_t mask, int m)
196 int p, i, r, idx;
198 p = mul_2exp_mod (1, mod_bits, m);
199 p = neg_mod (p, m);
201 mpz_set_ui (mask, 0L);
202 for (i = 0; i < m; i++)
204 r = (i * i) % m;
205 idx = (r * p) % m;
206 mpz_setbit (mask, (unsigned long) idx);
210 void
211 generate_sq_res_0x100 (int limb_bits)
213 int i, res;
215 nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
216 sq_res_0x100 = xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
218 for (i = 0; i < nsq_res_0x100; i++)
219 mpz_init_set_ui (sq_res_0x100[i], 0L);
221 for (i = 0; i < 0x100; i++)
223 res = (i * i) % 0x100;
224 mpz_setbit (sq_res_0x100[res / limb_bits],
225 (unsigned long) (res % limb_bits));
228 sq_res_0x100_num = 0;
229 for (i = 0; i < nsq_res_0x100; i++)
230 sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
231 sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
234 void
235 generate_mod (int limb_bits, int nail_bits)
237 int numb_bits = limb_bits - nail_bits;
238 int i, divisor;
240 mpz_init_set_ui (pp, 0L);
241 mpz_init_set_ui (pp_norm, 0L);
242 mpz_init_set_ui (pp_inverted, 0L);
244 /* no more than limb_bits many factors in a one limb modulus (and of
245 course in reality nothing like that many) */
246 factor_alloc = limb_bits;
247 factor = xmalloc (factor_alloc * sizeof (*factor));
248 rawfactor = xmalloc (factor_alloc * sizeof (*rawfactor));
250 if (numb_bits % 4 != 0)
252 strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
253 goto use_pp;
256 max_divisor = 2*limb_bits;
257 max_divisor_bits = log2_ceil (max_divisor);
259 if (numb_bits / 4 < max_divisor_bits)
261 /* Wind back to one limb worth of max_divisor, if that will let us use
262 mpn_mod_34lsub1. */
263 max_divisor = limb_bits;
264 max_divisor_bits = log2_ceil (max_divisor);
266 if (numb_bits / 4 < max_divisor_bits)
268 strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
269 goto use_pp;
274 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
275 mpz_t m, q, r;
276 int multiplicity;
278 mod34_bits = (numb_bits / 4) * 3;
280 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
281 the mod34_bits mark, adding the two halves for a remainder of at most
282 mod34_bits+1 many bits */
283 mod_bits = mod34_bits + 1;
285 mpz_init_set_ui (m, 1L);
286 mpz_mul_2exp (m, m, mod34_bits);
287 mpz_sub_ui (m, m, 1L);
289 mpz_init (q);
290 mpz_init (r);
292 for (i = 3; i <= max_divisor; i++)
294 if (! isprime (i))
295 continue;
297 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
298 if (mpz_sgn (r) != 0)
299 continue;
301 /* if a repeated prime is found it's used as an i^n in one factor */
302 divisor = 1;
303 multiplicity = 0;
306 if (divisor > max_divisor / i)
307 break;
308 multiplicity++;
309 mpz_set (m, q);
310 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
312 while (mpz_sgn (r) == 0);
314 assert (nrawfactor < factor_alloc);
315 rawfactor[nrawfactor].divisor = i;
316 rawfactor[nrawfactor].multiplicity = multiplicity;
317 nrawfactor++;
320 mpz_clear (m);
321 mpz_clear (q);
322 mpz_clear (r);
325 if (nrawfactor <= 2)
327 mpz_t new_pp;
329 sprintf (mod34_excuse, "only %d small factor%s",
330 nrawfactor, nrawfactor == 1 ? "" : "s");
332 use_pp:
333 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
334 tried with just one */
335 max_divisor = 2*limb_bits;
336 max_divisor_bits = log2_ceil (max_divisor);
338 mpz_init (new_pp);
339 nrawfactor = 0;
340 mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
342 /* one copy of each small prime */
343 mpz_set_ui (pp, 1L);
344 for (i = 3; i <= max_divisor; i++)
346 if (! isprime (i))
347 continue;
349 mpz_mul_ui (new_pp, pp, (unsigned long) i);
350 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
351 break;
352 mpz_set (pp, new_pp);
354 assert (nrawfactor < factor_alloc);
355 rawfactor[nrawfactor].divisor = i;
356 rawfactor[nrawfactor].multiplicity = 1;
357 nrawfactor++;
360 /* Plus an extra copy of one or more of the primes selected, if that
361 still fits in max_divisor and the total in mod_bits. Usually only
362 3 or 5 will be candidates */
363 for (i = nrawfactor-1; i >= 0; i--)
365 if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
366 continue;
367 mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
368 if (mpz_sizeinbase (new_pp, 2) > mod_bits)
369 continue;
370 mpz_set (pp, new_pp);
372 rawfactor[i].multiplicity++;
375 mod_bits = mpz_sizeinbase (pp, 2);
377 mpz_set (pp_norm, pp);
378 while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
379 mpz_add (pp_norm, pp_norm, pp_norm);
381 mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
383 mpz_clear (new_pp);
386 /* start the factor array */
387 for (i = 0; i < nrawfactor; i++)
389 int j;
390 assert (nfactor < factor_alloc);
391 factor[nfactor].divisor = 1;
392 for (j = 0; j < rawfactor[i].multiplicity; j++)
393 factor[nfactor].divisor *= rawfactor[i].divisor;
394 nfactor++;
397 combine:
398 /* Combine entries in the factor array. Combine the smallest entry with
399 the biggest one that will fit with it (ie. under max_divisor), then
400 repeat that with the new smallest entry. */
401 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
402 for (i = nfactor-1; i >= 1; i--)
404 if (factor[i].divisor <= max_divisor / factor[0].divisor)
406 factor[0].divisor *= factor[i].divisor;
407 COLLAPSE_ELEMENT (factor, i, nfactor);
408 goto combine;
412 total_fraction = 1.0;
413 for (i = 0; i < nfactor; i++)
415 mpz_init (factor[i].inverse);
416 mpz_invert_ui_2exp (factor[i].inverse,
417 (unsigned long) factor[i].divisor,
418 (unsigned long) mod_bits);
420 mpz_init (factor[i].mask);
421 square_mask (factor[i].mask, factor[i].divisor);
423 /* fraction of possible squares */
424 factor[i].fraction = (double) mpz_popcount (factor[i].mask)
425 / factor[i].divisor;
427 /* total fraction of possible squares */
428 total_fraction *= factor[i].fraction;
431 /* best tests first (ie. smallest fraction) */
432 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
435 void
436 print (int limb_bits, int nail_bits)
438 int i;
439 mpz_t mhi, mlo;
441 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
442 printf ("\n");
444 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
445 limb_bits, nail_bits);
446 printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
447 limb_bits, nail_bits);
448 printf ("#endif\n");
449 printf ("\n");
451 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
452 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
453 (1.0 - sq_res_0x100_fraction) * 100.0,
454 0x100 - sq_res_0x100_num);
455 printf ("static const mp_limb_t\n");
456 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
457 for (i = 0; i < nsq_res_0x100; i++)
459 printf (" CNST_LIMB(0x");
460 mpz_out_str (stdout, 16, sq_res_0x100[i]);
461 printf ("),\n");
463 printf ("};\n");
464 printf ("\n");
466 if (mpz_sgn (pp) != 0)
468 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
469 printf ("/* PERFSQR_PP = ");
471 else
472 printf ("/* 2^%d-1 = ", mod34_bits);
473 for (i = 0; i < nrawfactor; i++)
475 if (i != 0)
476 printf (" * ");
477 printf ("%d", rawfactor[i].divisor);
478 if (rawfactor[i].multiplicity != 1)
479 printf ("^%d", rawfactor[i].multiplicity);
481 printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
483 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
484 if (mpz_sgn (pp) != 0)
486 printf ("#define PERFSQR_PP CNST_LIMB(0x");
487 mpz_out_str (stdout, 16, pp);
488 printf (")\n");
489 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
490 mpz_out_str (stdout, 16, pp_norm);
491 printf (")\n");
492 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
493 mpz_out_str (stdout, 16, pp_inverted);
494 printf (")\n");
496 printf ("\n");
498 mpz_init (mhi);
499 mpz_init (mlo);
501 printf ("/* This test identifies %.2f%% as non-squares. */\n",
502 (1.0 - total_fraction) * 100.0);
503 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
504 printf (" do { \\\n");
505 printf (" mp_limb_t r; \\\n");
506 if (mpz_sgn (pp) != 0)
507 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
508 else
509 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
511 for (i = 0; i < nfactor; i++)
513 printf (" \\\n");
514 printf (" /* %5.2f%% */ \\\n",
515 (1.0 - factor[i].fraction) * 100.0);
517 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
518 factor[i].divisor <= limb_bits ? 1 : 2,
519 factor[i].divisor);
520 mpz_out_str (stdout, 16, factor[i].inverse);
521 printf ("), \\\n");
522 printf (" CNST_LIMB(0x");
524 if ( factor[i].divisor <= limb_bits)
526 mpz_out_str (stdout, 16, factor[i].mask);
528 else
530 mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
531 mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
532 mpz_out_str (stdout, 16, mhi);
533 printf ("), CNST_LIMB(0x");
534 mpz_out_str (stdout, 16, mlo);
536 printf (")); \\\n");
539 printf (" } while (0)\n");
540 printf ("\n");
542 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
543 (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
544 printf ("\n");
546 printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
547 printf ("#define PERFSQR_DIVISORS { 256,");
548 for (i = 0; i < nfactor; i++)
549 printf (" %d,", factor[i].divisor);
550 printf (" }\n");
553 mpz_clear (mhi);
554 mpz_clear (mlo);
558 main (int argc, char *argv[])
560 int limb_bits, nail_bits;
562 if (argc != 3)
564 fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
565 exit (1);
568 limb_bits = atoi (argv[1]);
569 nail_bits = atoi (argv[2]);
571 if (limb_bits <= 0
572 || nail_bits < 0
573 || nail_bits >= limb_bits)
575 fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
576 limb_bits, nail_bits);
577 exit (1);
580 generate_sq_res_0x100 (limb_bits);
581 generate_mod (limb_bits, nail_bits);
583 print (limb_bits, nail_bits);
585 return 0;