source/libs/gmp/gmp-src/gen-psqr.c

   1 /* Generate perfect square testing data.
   2
   3 Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of either:
   9
  10   * the GNU Lesser General Public License as published by the Free
  11     Software Foundation; either version 3 of the License, or (at your
  12     option) any later version.
  13
  14 or
  15
  16   * the GNU General Public License as published by the Free Software
  17     Foundation; either version 2 of the License, or (at your option) any
  18     later version.
  19
  20 or both in parallel, as here.
  21
  22 The GNU MP Library is distributed in the hope that it will be useful, but
  23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  25 for more details.
  26
  27 You should have received copies of the GNU General Public License and the
  28 GNU Lesser General Public License along with the GNU MP Library.  If not,
  29 see https://www.gnu.org/licenses/.  */
  30
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33
  34 #include "bootstrap.c"
  35
  36
  37 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
  38    (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
  39    residues and non-residues modulo small factors of that modulus.
  40
  41    For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
  42    function exists specifically because 2^24-1 and 2^48-1 have nice sets of
  43    prime factors.  For other limb sizes it's considered, but if it doesn't
  44    have good factors then mpn_mod_1 will be used instead.
  45
  46    When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
  47    selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
  48    with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
  49    GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
  50    calculation within a single limb.
  51
  52    In either case primes can be combined to make divisors.  The table data
  53    then effectively indicates remainders which are quadratic residues mod
  54    all the primes.  This sort of combining reduces the number of steps
  55    needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
  56    Nothing is gained or lost in terms of detections, the same total fraction
  57    of non-residues will be identified.
  58
  59    Nothing particularly sophisticated is attempted for combining factors to
  60    make divisors.  This is probably a kind of knapsack problem so it'd be
  61    too hard to attempt anything completely general.  For the usual 32 and 64
  62    bit limbs we get a good enough result just pairing the biggest and
  63    smallest which fit together, repeatedly.
  64
  65    Another aim is to get powerful combinations, ie. divisors which identify
  66    biggest fraction of non-residues, and have those run first.  Again for
  67    the usual 32 and 64 bits it seems good enough just to pair for big
  68    divisors then sort according to the resulting fraction of non-residues
  69    identified.
  70
  71    Also in this program, a table sq_res_0x100 of residues modulo 256 is
  72    generated.  This simply fills bits into limbs of the appropriate
  73    build-time GMP_LIMB_BITS each.
  74
  75 */
  76
  77
  78 /* Normally we aren't using const in gen*.c programs, so as not to have to
  79    bother figuring out if it works, but using it with f_cmp_divisor and
  80    f_cmp_fraction avoids warnings from the qsort calls. */
  81
  82 /* Same tests as gmp.h. */
  83 #if  defined (__STDC__)                                 \
  84   || defined (__cplusplus)                              \
  85   || defined (_AIX)                                     \
  86   || defined (__DECC)                                   \
  87   || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
  88   || defined (_MSC_VER)                                 \
  89   || defined (_WIN32)
  90 #define HAVE_CONST        1
  91 #endif
  92
  93 #if ! HAVE_CONST
  94 #define const
  95 #endif
  96
  97
  98 mpz_t  *sq_res_0x100;          /* table of limbs */
  99 int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
 100 int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
 101 double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
 102
 103 int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
 104 int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
 105 int     max_divisor;       /* all divisors <= max_divisor */
 106 int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
 107 double  total_fraction;    /* of squares */
 108 mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
 109 mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
 110 mpz_t   pp_inverted;       /* invert_limb style inverse */
 111 mpz_t   mod_mask;          /* 2^mod_bits-1 */
 112 char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
 113
 114 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
 115 struct rawfactor_t {
 116   int     divisor;
 117   int     multiplicity;
 118 };
 119 struct rawfactor_t  *rawfactor;
 120 int                 nrawfactor;
 121
 122 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
 123 struct factor_t {
 124   int     divisor;
 125   mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
 126   mpz_t   mask;      /* indicating squares mod divisor */
 127   double  fraction;  /* squares/total */
 128 };
 129 struct factor_t  *factor;
 130 int              nfactor;       /* entries in use in factor array */
 131 int              factor_alloc;  /* entries allocated to factor array */
 132
 133
 134 int
 135 f_cmp_divisor (const void *parg, const void *qarg)
 136 {
 137   const struct factor_t *p, *q;
 138   p = (const struct factor_t *) parg;
 139   q = (const struct factor_t *) qarg;
 140   if (p->divisor > q->divisor)
 141     return 1;
 142   else if (p->divisor < q->divisor)
 143     return -1;
 144   else
 145     return 0;
 146 }
 147
 148 int
 149 f_cmp_fraction (const void *parg, const void *qarg)
 150 {
 151   const struct factor_t *p, *q;
 152   p = (const struct factor_t *) parg;
 153   q = (const struct factor_t *) qarg;
 154   if (p->fraction > q->fraction)
 155     return 1;
 156   else if (p->fraction < q->fraction)
 157     return -1;
 158   else
 159     return 0;
 160 }
 161
 162 /* Remove array[idx] by copying the remainder down, and adjust narray
 163    accordingly.  */
 164 #define COLLAPSE_ELEMENT(array, idx, narray)                    \
 165   do {                                                          \
 166     memmove (&(array)[idx],                                     \
 167              &(array)[idx+1],                                   \
 168              ((narray)-((idx)+1)) * sizeof (array[0]));         \
 169     (narray)--;                                                 \
 170   } while (0)
 171
 172
 173 /* return n*2^p mod m */
 174 int
 175 mul_2exp_mod (int n, int p, int m)
 176 {
 177   while (--p >= 0)
 178     n = (2 * n) % m;
 179   return n;
 180 }
 181
 182 /* return -n mod m */
 183 int
 184 neg_mod (int n, int m)
 185 {
 186   assert (n >= 0 && n < m);
 187   return (n == 0 ? 0 : m-n);
 188 }
 189
 190 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
 191    "-(idx<<mod_bits)" can be a square modulo m.  */
 192 void
 193 square_mask (mpz_t mask, int m)
 194 {
 195   int    p, i, r, idx;
 196
 197   p = mul_2exp_mod (1, mod_bits, m);
 198   p = neg_mod (p, m);
 199
 200   mpz_set_ui (mask, 0L);
 201   for (i = 0; i < m; i++)
 202     {
 203       r = (i * i) % m;
 204       idx = (r * p) % m;
 205       mpz_setbit (mask, (unsigned long) idx);
 206     }
 207 }
 208
 209 void
 210 generate_sq_res_0x100 (int limb_bits)
 211 {
 212   int  i, res;
 213
 214   nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
 215   sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
 216
 217   for (i = 0; i < nsq_res_0x100; i++)
 218     mpz_init_set_ui (sq_res_0x100[i], 0L);
 219
 220   for (i = 0; i < 0x100; i++)
 221     {
 222       res = (i * i) % 0x100;
 223       mpz_setbit (sq_res_0x100[res / limb_bits],
 224                   (unsigned long) (res % limb_bits));
 225     }
 226
 227   sq_res_0x100_num = 0;
 228   for (i = 0; i < nsq_res_0x100; i++)
 229     sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
 230   sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
 231 }
 232
 233 void
 234 generate_mod (int limb_bits, int nail_bits)
 235 {
 236   int    numb_bits = limb_bits - nail_bits;
 237   int    i, divisor;
 238
 239   mpz_init_set_ui (pp, 0L);
 240   mpz_init_set_ui (pp_norm, 0L);
 241   mpz_init_set_ui (pp_inverted, 0L);
 242
 243   /* no more than limb_bits many factors in a one limb modulus (and of
 244      course in reality nothing like that many) */
 245   factor_alloc = limb_bits;
 246   factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
 247   rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor));
 248
 249   if (numb_bits % 4 != 0)
 250     {
 251       strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
 252       goto use_pp;
 253     }
 254
 255   max_divisor = 2*limb_bits;
 256   max_divisor_bits = log2_ceil (max_divisor);
 257
 258   if (numb_bits / 4 < max_divisor_bits)
 259     {
 260       /* Wind back to one limb worth of max_divisor, if that will let us use
 261          mpn_mod_34lsub1.  */
 262       max_divisor = limb_bits;
 263       max_divisor_bits = log2_ceil (max_divisor);
 264
 265       if (numb_bits / 4 < max_divisor_bits)
 266         {
 267           strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
 268           goto use_pp;
 269         }
 270     }
 271
 272   {
 273     /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
 274     mpz_t  m, q, r;
 275     int    multiplicity;
 276
 277     mod34_bits = (numb_bits / 4) * 3;
 278
 279     /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
 280        the mod34_bits mark, adding the two halves for a remainder of at most
 281        mod34_bits+1 many bits */
 282     mod_bits = mod34_bits + 1;
 283
 284     mpz_init_set_ui (m, 1L);
 285     mpz_mul_2exp (m, m, mod34_bits);
 286     mpz_sub_ui (m, m, 1L);
 287
 288     mpz_init (q);
 289     mpz_init (r);
 290
 291     for (i = 3; i <= max_divisor; i+=2)
 292       {
 293         if (! isprime (i))
 294           continue;
 295
 296         mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
 297         if (mpz_sgn (r) != 0)
 298           continue;
 299
 300         /* if a repeated prime is found it's used as an i^n in one factor */
 301         divisor = 1;
 302         multiplicity = 0;
 303         do
 304           {
 305             if (divisor > max_divisor / i)
 306               break;
 307             multiplicity++;
 308             mpz_set (m, q);
 309             mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
 310           }
 311         while (mpz_sgn (r) == 0);
 312
 313         assert (nrawfactor < factor_alloc);
 314         rawfactor[nrawfactor].divisor = i;
 315         rawfactor[nrawfactor].multiplicity = multiplicity;
 316         nrawfactor++;
 317       }
 318
 319     mpz_clear (m);
 320     mpz_clear (q);
 321     mpz_clear (r);
 322   }
 323
 324   if (nrawfactor <= 2)
 325     {
 326       mpz_t  new_pp;
 327
 328       sprintf (mod34_excuse, "only %d small factor%s",
 329                nrawfactor, nrawfactor == 1 ? "" : "s");
 330
 331     use_pp:
 332       /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
 333          tried with just one */
 334       max_divisor = 2*limb_bits;
 335       max_divisor_bits = log2_ceil (max_divisor);
 336
 337       mpz_init (new_pp);
 338       nrawfactor = 0;
 339       mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
 340
 341       /* one copy of each small prime */
 342       mpz_set_ui (pp, 1L);
 343       for (i = 3; i <= max_divisor; i+=2)
 344         {
 345           if (! isprime (i))
 346             continue;
 347
 348           mpz_mul_ui (new_pp, pp, (unsigned long) i);
 349           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
 350             break;
 351           mpz_set (pp, new_pp);
 352
 353           assert (nrawfactor < factor_alloc);
 354           rawfactor[nrawfactor].divisor = i;
 355           rawfactor[nrawfactor].multiplicity = 1;
 356           nrawfactor++;
 357         }
 358
 359       /* Plus an extra copy of one or more of the primes selected, if that
 360          still fits in max_divisor and the total in mod_bits.  Usually only
 361          3 or 5 will be candidates */
 362       for (i = nrawfactor-1; i >= 0; i--)
 363         {
 364           if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
 365             continue;
 366           mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
 367           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
 368             continue;
 369           mpz_set (pp, new_pp);
 370
 371           rawfactor[i].multiplicity++;
 372         }
 373
 374       mod_bits = mpz_sizeinbase (pp, 2);
 375
 376       mpz_set (pp_norm, pp);
 377       while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
 378         mpz_add (pp_norm, pp_norm, pp_norm);
 379
 380       mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
 381
 382       mpz_clear (new_pp);
 383     }
 384
 385   /* start the factor array */
 386   for (i = 0; i < nrawfactor; i++)
 387     {
 388       int  j;
 389       assert (nfactor < factor_alloc);
 390       factor[nfactor].divisor = 1;
 391       for (j = 0; j < rawfactor[i].multiplicity; j++)
 392         factor[nfactor].divisor *= rawfactor[i].divisor;
 393       nfactor++;
 394     }
 395
 396  combine:
 397   /* Combine entries in the factor array.  Combine the smallest entry with
 398      the biggest one that will fit with it (ie. under max_divisor), then
 399      repeat that with the new smallest entry. */
 400   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
 401   for (i = nfactor-1; i >= 1; i--)
 402     {
 403       if (factor[i].divisor <= max_divisor / factor[0].divisor)
 404         {
 405           factor[0].divisor *= factor[i].divisor;
 406           COLLAPSE_ELEMENT (factor, i, nfactor);
 407           goto combine;
 408         }
 409     }
 410
 411   total_fraction = 1.0;
 412   for (i = 0; i < nfactor; i++)
 413     {
 414       mpz_init (factor[i].inverse);
 415       mpz_invert_ui_2exp (factor[i].inverse,
 416                           (unsigned long) factor[i].divisor,
 417                           (unsigned long) mod_bits);
 418
 419       mpz_init (factor[i].mask);
 420       square_mask (factor[i].mask, factor[i].divisor);
 421
 422       /* fraction of possible squares */
 423       factor[i].fraction = (double) mpz_popcount (factor[i].mask)
 424         / factor[i].divisor;
 425
 426       /* total fraction of possible squares */
 427       total_fraction *= factor[i].fraction;
 428     }
 429
 430   /* best tests first (ie. smallest fraction) */
 431   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
 432 }
 433
 434 void
 435 print (int limb_bits, int nail_bits)
 436 {
 437   int    i;
 438   mpz_t  mhi, mlo;
 439
 440   printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
 441   printf ("\n");
 442
 443   printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
 444           limb_bits, nail_bits);
 445   printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
 446           limb_bits, nail_bits);
 447   printf ("#endif\n");
 448   printf ("\n");
 449
 450   printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
 451   printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
 452           (1.0 - sq_res_0x100_fraction) * 100.0,
 453           0x100 - sq_res_0x100_num);
 454   printf ("static const mp_limb_t\n");
 455   printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
 456   for (i = 0; i < nsq_res_0x100; i++)
 457     {
 458       printf ("  CNST_LIMB(0x");
 459       mpz_out_str (stdout, 16, sq_res_0x100[i]);
 460       printf ("),\n");
 461     }
 462   printf ("};\n");
 463   printf ("\n");
 464
 465   if (mpz_sgn (pp) != 0)
 466     {
 467       printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
 468       printf ("/* PERFSQR_PP = ");
 469     }
 470   else
 471     printf ("/* 2^%d-1 = ", mod34_bits);
 472   for (i = 0; i < nrawfactor; i++)
 473     {
 474       if (i != 0)
 475         printf (" * ");
 476       printf ("%d", rawfactor[i].divisor);
 477       if (rawfactor[i].multiplicity != 1)
 478         printf ("^%d", rawfactor[i].multiplicity);
 479     }
 480   printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
 481
 482   printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
 483   if (mpz_sgn (pp) != 0)
 484     {
 485       printf ("#define PERFSQR_PP            CNST_LIMB(0x");
 486       mpz_out_str (stdout, 16, pp);
 487       printf (")\n");
 488       printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
 489       mpz_out_str (stdout, 16, pp_norm);
 490       printf (")\n");
 491       printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
 492       mpz_out_str (stdout, 16, pp_inverted);
 493       printf (")\n");
 494     }
 495   printf ("\n");
 496
 497   mpz_init (mhi);
 498   mpz_init (mlo);
 499
 500   printf ("/* This test identifies %.2f%% as non-squares. */\n",
 501           (1.0 - total_fraction) * 100.0);
 502   printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
 503   printf ("  do {                              \\\n");
 504   printf ("    mp_limb_t  r;                   \\\n");
 505   if (mpz_sgn (pp) != 0)
 506     printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
 507   else
 508     printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
 509
 510   for (i = 0; i < nfactor; i++)
 511     {
 512       printf ("                                    \\\n");
 513       printf ("    /* %5.2f%% */                    \\\n",
 514               (1.0 - factor[i].fraction) * 100.0);
 515
 516       printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
 517               factor[i].divisor <= limb_bits ? 1 : 2,
 518               factor[i].divisor);
 519       mpz_out_str (stdout, 16, factor[i].inverse);
 520       printf ("), \\\n");
 521       printf ("                   CNST_LIMB(0x");
 522
 523       if ( factor[i].divisor <= limb_bits)
 524         {
 525           mpz_out_str (stdout, 16, factor[i].mask);
 526         }
 527       else
 528         {
 529           mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
 530           mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
 531           mpz_out_str (stdout, 16, mhi);
 532           printf ("), CNST_LIMB(0x");
 533           mpz_out_str (stdout, 16, mlo);
 534         }
 535       printf (")); \\\n");
 536     }
 537
 538   printf ("  } while (0)\n");
 539   printf ("\n");
 540
 541   printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
 542           (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
 543   printf ("\n");
 544
 545   printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
 546   printf ("#define PERFSQR_DIVISORS  { 256,");
 547   for (i = 0; i < nfactor; i++)
 548       printf (" %d,", factor[i].divisor);
 549   printf (" }\n");
 550
 551
 552   mpz_clear (mhi);
 553   mpz_clear (mlo);
 554 }
 555
 556 int
 557 main (int argc, char *argv[])
 558 {
 559   int  limb_bits, nail_bits;
 560
 561   if (argc != 3)
 562     {
 563       fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
 564       exit (1);
 565     }
 566
 567   limb_bits = atoi (argv[1]);
 568   nail_bits = atoi (argv[2]);
 569
 570   if (limb_bits <= 0
 571       || nail_bits < 0
 572       || nail_bits >= limb_bits)
 573     {
 574       fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
 575                limb_bits, nail_bits);
 576       exit (1);
 577     }
 578
 579   generate_sq_res_0x100 (limb_bits);
 580   generate_mod (limb_bits, nail_bits);
 581
 582   print (limb_bits, nail_bits);
 583
 584   return 0;
 585 }