1 /* Generate perfect square testing data.
3 Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
27 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
28 residues and non-residues modulo small factors of that modulus.
30 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That
31 function exists specifically because 2^24-1 and 2^48-1 have nice sets of
32 prime factors. For other limb sizes it's considered, but if it doesn't
33 have good factors then mpn_mod_1 will be used instead.
35 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
36 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
37 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
38 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
39 calculation within a single limb.
41 In either case primes can be combined to make divisors. The table data
42 then effectively indicates remainders which are quadratic residues mod
43 all the primes. This sort of combining reduces the number of steps
44 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
45 Nothing is gained or lost in terms of detections, the same total fraction
46 of non-residues will be identified.
48 Nothing particularly sophisticated is attempted for combining factors to
49 make divisors. This is probably a kind of knapsack problem so it'd be
50 too hard to attempt anything completely general. For the usual 32 and 64
51 bit limbs we get a good enough result just pairing the biggest and
52 smallest which fit together, repeatedly.
54 Another aim is to get powerful combinations, ie. divisors which identify
55 biggest fraction of non-residues, and have those run first. Again for
56 the usual 32 and 64 bits it seems good enough just to pair for big
57 divisors then sort according to the resulting fraction of non-residues
60 Also in this program, a table sq_res_0x100 of residues modulo 256 is
61 generated. This simply fills bits into limbs of the appropriate
62 build-time GMP_LIMB_BITS each.
67 /* Normally we aren't using const in gen*.c programs, so as not to have to
68 bother figuring out if it works, but using it with f_cmp_divisor and
69 f_cmp_fraction avoids warnings from the qsort calls. */
71 /* Same tests as gmp.h. */
72 #if defined (__STDC__) \
73 || defined (__cplusplus) \
76 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
77 || defined (_MSC_VER) \
87 mpz_t
*sq_res_0x100
; /* table of limbs */
88 int nsq_res_0x100
; /* elements in sq_res_0x100 array */
89 int sq_res_0x100_num
; /* squares in sq_res_0x100 */
90 double sq_res_0x100_fraction
; /* sq_res_0x100_num / 256 */
92 int mod34_bits
; /* 3*GMP_NUMB_BITS/4 */
93 int mod_bits
; /* bits from PERFSQR_MOD_34 or MOD_PP */
94 int max_divisor
; /* all divisors <= max_divisor */
95 int max_divisor_bits
; /* ceil(log2(max_divisor)) */
96 double total_fraction
; /* of squares */
97 mpz_t pp
; /* product of primes, or 0 if mod_34lsub1 used */
98 mpz_t pp_norm
; /* pp shifted so NUMB high bit set */
99 mpz_t pp_inverted
; /* invert_limb style inverse */
100 mpz_t mod_mask
; /* 2^mod_bits-1 */
101 char mod34_excuse
[128]; /* why mod_34lsub1 not used (if it's not) */
103 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
108 struct rawfactor_t
*rawfactor
;
111 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
114 mpz_t inverse
; /* 1/divisor mod 2^mod_bits */
115 mpz_t mask
; /* indicating squares mod divisor */
116 double fraction
; /* squares/total */
118 struct factor_t
*factor
;
119 int nfactor
; /* entries in use in factor array */
120 int factor_alloc
; /* entries allocated to factor array */
124 f_cmp_divisor (const void *parg
, const void *qarg
)
126 const struct factor_t
*p
, *q
;
129 if (p
->divisor
> q
->divisor
)
131 else if (p
->divisor
< q
->divisor
)
138 f_cmp_fraction (const void *parg
, const void *qarg
)
140 const struct factor_t
*p
, *q
;
143 if (p
->fraction
> q
->fraction
)
145 else if (p
->fraction
< q
->fraction
)
151 /* Remove array[idx] by copying the remainder down, and adjust narray
153 #define COLLAPSE_ELEMENT(array, idx, narray) \
155 mem_copyi ((char *) &(array)[idx], \
156 (char *) &(array)[idx+1], \
157 ((narray)-((idx)+1)) * sizeof (array[0])); \
162 /* return n*2^p mod m */
164 mul_2exp_mod (int n
, int p
, int m
)
167 for (i
= 0; i
< p
; i
++)
172 /* return -n mod m */
174 neg_mod (int n
, int m
)
176 ASSERT (n
>= 0 && n
< m
);
177 return (n
== 0 ? 0 : m
-n
);
180 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
181 "-(idx<<mod_bits)" can be a square modulo m. */
183 square_mask (mpz_t mask
, int m
)
187 p
= mul_2exp_mod (1, mod_bits
, m
);
190 mpz_set_ui (mask
, 0L);
191 for (i
= 0; i
< m
; i
++)
195 mpz_setbit (mask
, (unsigned long) idx
);
200 generate_sq_res_0x100 (int limb_bits
)
204 nsq_res_0x100
= (0x100 + limb_bits
- 1) / limb_bits
;
205 sq_res_0x100
= (mpz_t
*) xmalloc (nsq_res_0x100
* sizeof (*sq_res_0x100
));
207 for (i
= 0; i
< nsq_res_0x100
; i
++)
208 mpz_init_set_ui (sq_res_0x100
[i
], 0L);
210 for (i
= 0; i
< 0x100; i
++)
212 res
= (i
* i
) % 0x100;
213 mpz_setbit (sq_res_0x100
[res
/ limb_bits
],
214 (unsigned long) (res
% limb_bits
));
217 sq_res_0x100_num
= 0;
218 for (i
= 0; i
< nsq_res_0x100
; i
++)
219 sq_res_0x100_num
+= mpz_popcount (sq_res_0x100
[i
]);
220 sq_res_0x100_fraction
= (double) sq_res_0x100_num
/ 256.0;
224 generate_mod (int limb_bits
, int nail_bits
)
226 int numb_bits
= limb_bits
- nail_bits
;
229 mpz_init_set_ui (pp
, 0L);
230 mpz_init_set_ui (pp_norm
, 0L);
231 mpz_init_set_ui (pp_inverted
, 0L);
233 /* no more than limb_bits many factors in a one limb modulus (and of
234 course in reality nothing like that many) */
235 factor_alloc
= limb_bits
;
236 factor
= (struct factor_t
*) xmalloc (factor_alloc
* sizeof (*factor
));
237 rawfactor
= (struct rawfactor_t
*)
238 xmalloc (factor_alloc
* sizeof (*rawfactor
));
240 if (numb_bits
% 4 != 0)
242 strcpy (mod34_excuse
, "GMP_NUMB_BITS % 4 != 0");
246 max_divisor
= 2*limb_bits
;
247 max_divisor_bits
= log2_ceil (max_divisor
);
249 if (numb_bits
/ 4 < max_divisor_bits
)
251 /* Wind back to one limb worth of max_divisor, if that will let us use
253 max_divisor
= limb_bits
;
254 max_divisor_bits
= log2_ceil (max_divisor
);
256 if (numb_bits
/ 4 < max_divisor_bits
)
258 strcpy (mod34_excuse
, "GMP_NUMB_BITS / 4 too small");
264 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
268 mod34_bits
= (numb_bits
/ 4) * 3;
270 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
271 the mod34_bits mark, adding the two halves for a remainder of at most
272 mod34_bits+1 many bits */
273 mod_bits
= mod34_bits
+ 1;
275 mpz_init_set_ui (m
, 1L);
276 mpz_mul_2exp (m
, m
, mod34_bits
);
277 mpz_sub_ui (m
, m
, 1L);
282 for (i
= 3; i
<= max_divisor
; i
++)
287 mpz_tdiv_qr_ui (q
, r
, m
, (unsigned long) i
);
288 if (mpz_sgn (r
) != 0)
291 /* if a repeated prime is found it's used as an i^n in one factor */
296 if (divisor
> max_divisor
/ i
)
300 mpz_tdiv_qr_ui (q
, r
, m
, (unsigned long) i
);
302 while (mpz_sgn (r
) == 0);
304 ASSERT (nrawfactor
< factor_alloc
);
305 rawfactor
[nrawfactor
].divisor
= i
;
306 rawfactor
[nrawfactor
].multiplicity
= multiplicity
;
319 sprintf (mod34_excuse
, "only %d small factor%s",
320 nrawfactor
, nrawfactor
== 1 ? "" : "s");
323 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
324 tried with just one */
325 max_divisor
= 2*limb_bits
;
326 max_divisor_bits
= log2_ceil (max_divisor
);
330 mod_bits
= MIN (numb_bits
, limb_bits
- max_divisor_bits
);
332 /* one copy of each small prime */
334 for (i
= 3; i
<= max_divisor
; i
++)
339 mpz_mul_ui (new_pp
, pp
, (unsigned long) i
);
340 if (mpz_sizeinbase (new_pp
, 2) > mod_bits
)
342 mpz_set (pp
, new_pp
);
344 ASSERT (nrawfactor
< factor_alloc
);
345 rawfactor
[nrawfactor
].divisor
= i
;
346 rawfactor
[nrawfactor
].multiplicity
= 1;
350 /* Plus an extra copy of one or more of the primes selected, if that
351 still fits in max_divisor and the total in mod_bits. Usually only
352 3 or 5 will be candidates */
353 for (i
= nrawfactor
-1; i
>= 0; i
--)
355 if (rawfactor
[i
].divisor
> max_divisor
/ rawfactor
[i
].divisor
)
357 mpz_mul_ui (new_pp
, pp
, (unsigned long) rawfactor
[i
].divisor
);
358 if (mpz_sizeinbase (new_pp
, 2) > mod_bits
)
360 mpz_set (pp
, new_pp
);
362 rawfactor
[i
].multiplicity
++;
365 mod_bits
= mpz_sizeinbase (pp
, 2);
367 mpz_set (pp_norm
, pp
);
368 while (mpz_sizeinbase (pp_norm
, 2) < numb_bits
)
369 mpz_add (pp_norm
, pp_norm
, pp_norm
);
371 mpz_preinv_invert (pp_inverted
, pp_norm
, numb_bits
);
376 /* start the factor array */
377 for (i
= 0; i
< nrawfactor
; i
++)
380 ASSERT (nfactor
< factor_alloc
);
381 factor
[nfactor
].divisor
= 1;
382 for (j
= 0; j
< rawfactor
[i
].multiplicity
; j
++)
383 factor
[nfactor
].divisor
*= rawfactor
[i
].divisor
;
388 /* Combine entries in the factor array. Combine the smallest entry with
389 the biggest one that will fit with it (ie. under max_divisor), then
390 repeat that with the new smallest entry. */
391 qsort (factor
, nfactor
, sizeof (factor
[0]), f_cmp_divisor
);
392 for (i
= nfactor
-1; i
>= 1; i
--)
394 if (factor
[i
].divisor
<= max_divisor
/ factor
[0].divisor
)
396 factor
[0].divisor
*= factor
[i
].divisor
;
397 COLLAPSE_ELEMENT (factor
, i
, nfactor
);
402 total_fraction
= 1.0;
403 for (i
= 0; i
< nfactor
; i
++)
405 mpz_init (factor
[i
].inverse
);
406 mpz_invert_ui_2exp (factor
[i
].inverse
,
407 (unsigned long) factor
[i
].divisor
,
408 (unsigned long) mod_bits
);
410 mpz_init (factor
[i
].mask
);
411 square_mask (factor
[i
].mask
, factor
[i
].divisor
);
413 /* fraction of possible squares */
414 factor
[i
].fraction
= (double) mpz_popcount (factor
[i
].mask
)
417 /* total fraction of possible squares */
418 total_fraction
*= factor
[i
].fraction
;
421 /* best tests first (ie. smallest fraction) */
422 qsort (factor
, nfactor
, sizeof (factor
[0]), f_cmp_fraction
);
426 print (int limb_bits
, int nail_bits
)
431 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
434 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
435 limb_bits
, nail_bits
);
436 printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
437 limb_bits
, nail_bits
);
441 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
442 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
443 (1.0 - sq_res_0x100_fraction
) * 100.0,
444 0x100 - sq_res_0x100_num
);
445 printf ("static const mp_limb_t\n");
446 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100
);
447 for (i
= 0; i
< nsq_res_0x100
; i
++)
449 printf (" CNST_LIMB(0x");
450 mpz_out_str (stdout
, 16, sq_res_0x100
[i
]);
456 if (mpz_sgn (pp
) != 0)
458 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse
);
459 printf ("/* PERFSQR_PP = ");
462 printf ("/* 2^%d-1 = ", mod34_bits
);
463 for (i
= 0; i
< nrawfactor
; i
++)
467 printf ("%d", rawfactor
[i
].divisor
);
468 if (rawfactor
[i
].multiplicity
!= 1)
469 printf ("^%d", rawfactor
[i
].multiplicity
);
471 printf (" %s*/\n", mpz_sgn (pp
) == 0 ? "... " : "");
473 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits
);
474 if (mpz_sgn (pp
) != 0)
476 printf ("#define PERFSQR_PP CNST_LIMB(0x");
477 mpz_out_str (stdout
, 16, pp
);
479 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
480 mpz_out_str (stdout
, 16, pp_norm
);
482 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
483 mpz_out_str (stdout
, 16, pp_inverted
);
491 printf ("/* This test identifies %.2f%% as non-squares. */\n",
492 (1.0 - total_fraction
) * 100.0);
493 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
494 printf (" do { \\\n");
495 printf (" mp_limb_t r; \\\n");
496 if (mpz_sgn (pp
) != 0)
497 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
499 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
501 for (i
= 0; i
< nfactor
; i
++)
504 printf (" /* %5.2f%% */ \\\n",
505 (1.0 - factor
[i
].fraction
) * 100.0);
507 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
508 factor
[i
].divisor
<= limb_bits
? 1 : 2,
510 mpz_out_str (stdout
, 16, factor
[i
].inverse
);
512 printf (" CNST_LIMB(0x");
514 if ( factor
[i
].divisor
<= limb_bits
)
516 mpz_out_str (stdout
, 16, factor
[i
].mask
);
520 mpz_tdiv_r_2exp (mlo
, factor
[i
].mask
, (unsigned long) limb_bits
);
521 mpz_tdiv_q_2exp (mhi
, factor
[i
].mask
, (unsigned long) limb_bits
);
522 mpz_out_str (stdout
, 16, mhi
);
523 printf ("), CNST_LIMB(0x");
524 mpz_out_str (stdout
, 16, mlo
);
529 printf (" } while (0)\n");
532 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
533 (1.0 - (total_fraction
* 44.0/256.0)) * 100.0);
536 printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
537 printf ("#define PERFSQR_DIVISORS { 256,");
538 for (i
= 0; i
< nfactor
; i
++)
539 printf (" %d,", factor
[i
].divisor
);
548 main (int argc
, char *argv
[])
550 int limb_bits
, nail_bits
;
554 fprintf (stderr
, "Usage: gen-psqr <limbbits> <nailbits>\n");
558 limb_bits
= atoi (argv
[1]);
559 nail_bits
= atoi (argv
[2]);
563 || nail_bits
>= limb_bits
)
565 fprintf (stderr
, "Invalid limb/nail bits: %d %d\n",
566 limb_bits
, nail_bits
);
570 generate_sq_res_0x100 (limb_bits
);
571 generate_mod (limb_bits
, nail_bits
);
573 print (limb_bits
, nail_bits
);