release/src/router/gmp/gen-psqr.c

   1 /* Generate perfect square testing data.
   2
   3 Copyright 2002-2004, 2012 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of either:
   9
  10   * the GNU Lesser General Public License as published by the Free
  11     Software Foundation; either version 3 of the License, or (at your
  12     option) any later version.
  13
  14 or
  15
  16   * the GNU General Public License as published by the Free Software
  17     Foundation; either version 2 of the License, or (at your option) any
  18     later version.
  19
  20 or both in parallel, as here.
  21
  22 The GNU MP Library is distributed in the hope that it will be useful, but
  23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  25 for more details.
  26
  27 You should have received copies of the GNU General Public License and the
  28 GNU Lesser General Public License along with the GNU MP Library.  If not,
  29 see https://www.gnu.org/licenses/.  */
  30
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33
  34 #include "bootstrap.c"
  35
  36
  37 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
  38    (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
  39    residues and non-residues modulo small factors of that modulus.
  40
  41    For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
  42    function exists specifically because 2^24-1 and 2^48-1 have nice sets of
  43    prime factors.  For other limb sizes it's considered, but if it doesn't
  44    have good factors then mpn_mod_1 will be used instead.
  45
  46    When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
  47    selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
  48    with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
  49    GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
  50    calculation within a single limb.
  51
  52    In either case primes can be combined to make divisors.  The table data
  53    then effectively indicates remainders which are quadratic residues mod
  54    all the primes.  This sort of combining reduces the number of steps
  55    needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
  56    Nothing is gained or lost in terms of detections, the same total fraction
  57    of non-residues will be identified.
  58
  59    Nothing particularly sophisticated is attempted for combining factors to
  60    make divisors.  This is probably a kind of knapsack problem so it'd be
  61    too hard to attempt anything completely general.  For the usual 32 and 64
  62    bit limbs we get a good enough result just pairing the biggest and
  63    smallest which fit together, repeatedly.
  64
  65    Another aim is to get powerful combinations, ie. divisors which identify
  66    biggest fraction of non-residues, and have those run first.  Again for
  67    the usual 32 and 64 bits it seems good enough just to pair for big
  68    divisors then sort according to the resulting fraction of non-residues
  69    identified.
  70
  71    Also in this program, a table sq_res_0x100 of residues modulo 256 is
  72    generated.  This simply fills bits into limbs of the appropriate
  73    build-time GMP_LIMB_BITS each.
  74
  75 */
  76
  77
  78 /* Normally we aren't using const in gen*.c programs, so as not to have to
  79    bother figuring out if it works, but using it with f_cmp_divisor and
  80    f_cmp_fraction avoids warnings from the qsort calls. */
  81
  82 /* Same tests as gmp.h. */
  83 #if  defined (__STDC__)                                 \
  84   || defined (__cplusplus)                              \
  85   || defined (_AIX)                                     \
  86   || defined (__DECC)                                   \
  87   || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
  88   || defined (_MSC_VER)                                 \
  89   || defined (_WIN32)
  90 #define HAVE_CONST        1
  91 #endif
  92
  93 #if ! HAVE_CONST
  94 #define const
  95 #endif
  96
  97
  98 mpz_t  *sq_res_0x100;          /* table of limbs */
  99 int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
 100 int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
 101 double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
 102
 103 int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
 104 int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
 105 int     max_divisor;       /* all divisors <= max_divisor */
 106 int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
 107 double  total_fraction;    /* of squares */
 108 mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
 109 mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
 110 mpz_t   pp_inverted;       /* invert_limb style inverse */
 111 mpz_t   mod_mask;          /* 2^mod_bits-1 */
 112 char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
 113
 114 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
 115 struct rawfactor_t {
 116   int     divisor;
 117   int     multiplicity;
 118 };
 119 struct rawfactor_t  *rawfactor;
 120 int                 nrawfactor;
 121
 122 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
 123 struct factor_t {
 124   int     divisor;
 125   mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
 126   mpz_t   mask;      /* indicating squares mod divisor */
 127   double  fraction;  /* squares/total */
 128 };
 129 struct factor_t  *factor;
 130 int              nfactor;       /* entries in use in factor array */
 131 int              factor_alloc;  /* entries allocated to factor array */
 132
 133
 134 int
 135 f_cmp_divisor (const void *parg, const void *qarg)
 136 {
 137   const struct factor_t *p, *q;
 138   p = parg;
 139   q = qarg;
 140   if (p->divisor > q->divisor)
 141     return 1;
 142   else if (p->divisor < q->divisor)
 143     return -1;
 144   else
 145     return 0;
 146 }
 147
 148 int
 149 f_cmp_fraction (const void *parg, const void *qarg)
 150 {
 151   const struct factor_t *p, *q;
 152   p = parg;
 153   q = qarg;
 154   if (p->fraction > q->fraction)
 155     return 1;
 156   else if (p->fraction < q->fraction)
 157     return -1;
 158   else
 159     return 0;
 160 }
 161
 162 /* Remove array[idx] by copying the remainder down, and adjust narray
 163    accordingly.  */
 164 #define COLLAPSE_ELEMENT(array, idx, narray)                    \
 165   do {                                                          \
 166     memmove (&(array)[idx],                                     \
 167              &(array)[idx+1],                                   \
 168              ((narray)-((idx)+1)) * sizeof (array[0]));         \
 169     (narray)--;                                                 \
 170   } while (0)
 171
 172
 173 /* return n*2^p mod m */
 174 int
 175 mul_2exp_mod (int n, int p, int m)
 176 {
 177   int  i;
 178   for (i = 0; i < p; i++)
 179     n = (2 * n) % m;
 180   return n;
 181 }
 182
 183 /* return -n mod m */
 184 int
 185 neg_mod (int n, int m)
 186 {
 187   assert (n >= 0 && n < m);
 188   return (n == 0 ? 0 : m-n);
 189 }
 190
 191 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
 192    "-(idx<<mod_bits)" can be a square modulo m.  */
 193 void
 194 square_mask (mpz_t mask, int m)
 195 {
 196   int    p, i, r, idx;
 197
 198   p = mul_2exp_mod (1, mod_bits, m);
 199   p = neg_mod (p, m);
 200
 201   mpz_set_ui (mask, 0L);
 202   for (i = 0; i < m; i++)
 203     {
 204       r = (i * i) % m;
 205       idx = (r * p) % m;
 206       mpz_setbit (mask, (unsigned long) idx);
 207     }
 208 }
 209
 210 void
 211 generate_sq_res_0x100 (int limb_bits)
 212 {
 213   int  i, res;
 214
 215   nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
 216   sq_res_0x100 = xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
 217
 218   for (i = 0; i < nsq_res_0x100; i++)
 219     mpz_init_set_ui (sq_res_0x100[i], 0L);
 220
 221   for (i = 0; i < 0x100; i++)
 222     {
 223       res = (i * i) % 0x100;
 224       mpz_setbit (sq_res_0x100[res / limb_bits],
 225                   (unsigned long) (res % limb_bits));
 226     }
 227
 228   sq_res_0x100_num = 0;
 229   for (i = 0; i < nsq_res_0x100; i++)
 230     sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
 231   sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
 232 }
 233
 234 void
 235 generate_mod (int limb_bits, int nail_bits)
 236 {
 237   int    numb_bits = limb_bits - nail_bits;
 238   int    i, divisor;
 239
 240   mpz_init_set_ui (pp, 0L);
 241   mpz_init_set_ui (pp_norm, 0L);
 242   mpz_init_set_ui (pp_inverted, 0L);
 243
 244   /* no more than limb_bits many factors in a one limb modulus (and of
 245      course in reality nothing like that many) */
 246   factor_alloc = limb_bits;
 247   factor = xmalloc (factor_alloc * sizeof (*factor));
 248   rawfactor = xmalloc (factor_alloc * sizeof (*rawfactor));
 249
 250   if (numb_bits % 4 != 0)
 251     {
 252       strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
 253       goto use_pp;
 254     }
 255
 256   max_divisor = 2*limb_bits;
 257   max_divisor_bits = log2_ceil (max_divisor);
 258
 259   if (numb_bits / 4 < max_divisor_bits)
 260     {
 261       /* Wind back to one limb worth of max_divisor, if that will let us use
 262          mpn_mod_34lsub1.  */
 263       max_divisor = limb_bits;
 264       max_divisor_bits = log2_ceil (max_divisor);
 265
 266       if (numb_bits / 4 < max_divisor_bits)
 267         {
 268           strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
 269           goto use_pp;
 270         }
 271     }
 272
 273   {
 274     /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
 275     mpz_t  m, q, r;
 276     int    multiplicity;
 277
 278     mod34_bits = (numb_bits / 4) * 3;
 279
 280     /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
 281        the mod34_bits mark, adding the two halves for a remainder of at most
 282        mod34_bits+1 many bits */
 283     mod_bits = mod34_bits + 1;
 284
 285     mpz_init_set_ui (m, 1L);
 286     mpz_mul_2exp (m, m, mod34_bits);
 287     mpz_sub_ui (m, m, 1L);
 288
 289     mpz_init (q);
 290     mpz_init (r);
 291
 292     for (i = 3; i <= max_divisor; i++)
 293       {
 294         if (! isprime (i))
 295           continue;
 296
 297         mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
 298         if (mpz_sgn (r) != 0)
 299           continue;
 300
 301         /* if a repeated prime is found it's used as an i^n in one factor */
 302         divisor = 1;
 303         multiplicity = 0;
 304         do
 305           {
 306             if (divisor > max_divisor / i)
 307               break;
 308             multiplicity++;
 309             mpz_set (m, q);
 310             mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
 311           }
 312         while (mpz_sgn (r) == 0);
 313
 314         assert (nrawfactor < factor_alloc);
 315         rawfactor[nrawfactor].divisor = i;
 316         rawfactor[nrawfactor].multiplicity = multiplicity;
 317         nrawfactor++;
 318       }
 319
 320     mpz_clear (m);
 321     mpz_clear (q);
 322     mpz_clear (r);
 323   }
 324
 325   if (nrawfactor <= 2)
 326     {
 327       mpz_t  new_pp;
 328
 329       sprintf (mod34_excuse, "only %d small factor%s",
 330                nrawfactor, nrawfactor == 1 ? "" : "s");
 331
 332     use_pp:
 333       /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
 334          tried with just one */
 335       max_divisor = 2*limb_bits;
 336       max_divisor_bits = log2_ceil (max_divisor);
 337
 338       mpz_init (new_pp);
 339       nrawfactor = 0;
 340       mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
 341
 342       /* one copy of each small prime */
 343       mpz_set_ui (pp, 1L);
 344       for (i = 3; i <= max_divisor; i++)
 345         {
 346           if (! isprime (i))
 347             continue;
 348
 349           mpz_mul_ui (new_pp, pp, (unsigned long) i);
 350           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
 351             break;
 352           mpz_set (pp, new_pp);
 353
 354           assert (nrawfactor < factor_alloc);
 355           rawfactor[nrawfactor].divisor = i;
 356           rawfactor[nrawfactor].multiplicity = 1;
 357           nrawfactor++;
 358         }
 359
 360       /* Plus an extra copy of one or more of the primes selected, if that
 361          still fits in max_divisor and the total in mod_bits.  Usually only
 362          3 or 5 will be candidates */
 363       for (i = nrawfactor-1; i >= 0; i--)
 364         {
 365           if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
 366             continue;
 367           mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
 368           if (mpz_sizeinbase (new_pp, 2) > mod_bits)
 369             continue;
 370           mpz_set (pp, new_pp);
 371
 372           rawfactor[i].multiplicity++;
 373         }
 374
 375       mod_bits = mpz_sizeinbase (pp, 2);
 376
 377       mpz_set (pp_norm, pp);
 378       while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
 379         mpz_add (pp_norm, pp_norm, pp_norm);
 380
 381       mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
 382
 383       mpz_clear (new_pp);
 384     }
 385
 386   /* start the factor array */
 387   for (i = 0; i < nrawfactor; i++)
 388     {
 389       int  j;
 390       assert (nfactor < factor_alloc);
 391       factor[nfactor].divisor = 1;
 392       for (j = 0; j < rawfactor[i].multiplicity; j++)
 393         factor[nfactor].divisor *= rawfactor[i].divisor;
 394       nfactor++;
 395     }
 396
 397  combine:
 398   /* Combine entries in the factor array.  Combine the smallest entry with
 399      the biggest one that will fit with it (ie. under max_divisor), then
 400      repeat that with the new smallest entry. */
 401   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
 402   for (i = nfactor-1; i >= 1; i--)
 403     {
 404       if (factor[i].divisor <= max_divisor / factor[0].divisor)
 405         {
 406           factor[0].divisor *= factor[i].divisor;
 407           COLLAPSE_ELEMENT (factor, i, nfactor);
 408           goto combine;
 409         }
 410     }
 411
 412   total_fraction = 1.0;
 413   for (i = 0; i < nfactor; i++)
 414     {
 415       mpz_init (factor[i].inverse);
 416       mpz_invert_ui_2exp (factor[i].inverse,
 417                           (unsigned long) factor[i].divisor,
 418                           (unsigned long) mod_bits);
 419
 420       mpz_init (factor[i].mask);
 421       square_mask (factor[i].mask, factor[i].divisor);
 422
 423       /* fraction of possible squares */
 424       factor[i].fraction = (double) mpz_popcount (factor[i].mask)
 425         / factor[i].divisor;
 426
 427       /* total fraction of possible squares */
 428       total_fraction *= factor[i].fraction;
 429     }
 430
 431   /* best tests first (ie. smallest fraction) */
 432   qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
 433 }
 434
 435 void
 436 print (int limb_bits, int nail_bits)
 437 {
 438   int    i;
 439   mpz_t  mhi, mlo;
 440
 441   printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
 442   printf ("\n");
 443
 444   printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
 445           limb_bits, nail_bits);
 446   printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
 447           limb_bits, nail_bits);
 448   printf ("#endif\n");
 449   printf ("\n");
 450
 451   printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
 452   printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
 453           (1.0 - sq_res_0x100_fraction) * 100.0,
 454           0x100 - sq_res_0x100_num);
 455   printf ("static const mp_limb_t\n");
 456   printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
 457   for (i = 0; i < nsq_res_0x100; i++)
 458     {
 459       printf ("  CNST_LIMB(0x");
 460       mpz_out_str (stdout, 16, sq_res_0x100[i]);
 461       printf ("),\n");
 462     }
 463   printf ("};\n");
 464   printf ("\n");
 465
 466   if (mpz_sgn (pp) != 0)
 467     {
 468       printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
 469       printf ("/* PERFSQR_PP = ");
 470     }
 471   else
 472     printf ("/* 2^%d-1 = ", mod34_bits);
 473   for (i = 0; i < nrawfactor; i++)
 474     {
 475       if (i != 0)
 476         printf (" * ");
 477       printf ("%d", rawfactor[i].divisor);
 478       if (rawfactor[i].multiplicity != 1)
 479         printf ("^%d", rawfactor[i].multiplicity);
 480     }
 481   printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
 482
 483   printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
 484   if (mpz_sgn (pp) != 0)
 485     {
 486       printf ("#define PERFSQR_PP            CNST_LIMB(0x");
 487       mpz_out_str (stdout, 16, pp);
 488       printf (")\n");
 489       printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
 490       mpz_out_str (stdout, 16, pp_norm);
 491       printf (")\n");
 492       printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
 493       mpz_out_str (stdout, 16, pp_inverted);
 494       printf (")\n");
 495     }
 496   printf ("\n");
 497
 498   mpz_init (mhi);
 499   mpz_init (mlo);
 500
 501   printf ("/* This test identifies %.2f%% as non-squares. */\n",
 502           (1.0 - total_fraction) * 100.0);
 503   printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
 504   printf ("  do {                              \\\n");
 505   printf ("    mp_limb_t  r;                   \\\n");
 506   if (mpz_sgn (pp) != 0)
 507     printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
 508   else
 509     printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
 510
 511   for (i = 0; i < nfactor; i++)
 512     {
 513       printf ("                                    \\\n");
 514       printf ("    /* %5.2f%% */                    \\\n",
 515               (1.0 - factor[i].fraction) * 100.0);
 516
 517       printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
 518               factor[i].divisor <= limb_bits ? 1 : 2,
 519               factor[i].divisor);
 520       mpz_out_str (stdout, 16, factor[i].inverse);
 521       printf ("), \\\n");
 522       printf ("                   CNST_LIMB(0x");
 523
 524       if ( factor[i].divisor <= limb_bits)
 525         {
 526           mpz_out_str (stdout, 16, factor[i].mask);
 527         }
 528       else
 529         {
 530           mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
 531           mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
 532           mpz_out_str (stdout, 16, mhi);
 533           printf ("), CNST_LIMB(0x");
 534           mpz_out_str (stdout, 16, mlo);
 535         }
 536       printf (")); \\\n");
 537     }
 538
 539   printf ("  } while (0)\n");
 540   printf ("\n");
 541
 542   printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
 543           (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
 544   printf ("\n");
 545
 546   printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
 547   printf ("#define PERFSQR_DIVISORS  { 256,");
 548   for (i = 0; i < nfactor; i++)
 549       printf (" %d,", factor[i].divisor);
 550   printf (" }\n");
 551
 552
 553   mpz_clear (mhi);
 554   mpz_clear (mlo);
 555 }
 556
 557 int
 558 main (int argc, char *argv[])
 559 {
 560   int  limb_bits, nail_bits;
 561
 562   if (argc != 3)
 563     {
 564       fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
 565       exit (1);
 566     }
 567
 568   limb_bits = atoi (argv[1]);
 569   nail_bits = atoi (argv[2]);
 570
 571   if (limb_bits <= 0
 572       || nail_bits < 0
 573       || nail_bits >= limb_bits)
 574     {
 575       fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
 576                limb_bits, nail_bits);
 577       exit (1);
 578     }
 579
 580   generate_sq_res_0x100 (limb_bits);
 581   generate_mod (limb_bits, nail_bits);
 582
 583   print (limb_bits, nail_bits);
 584
 585   return 0;
 586 }