source/libs/gmp/gmp-src/mpn/generic/perfpow.c

   1 /* mpn_perfect_power_p -- mpn perfect power detection.
   2
   3    Contributed to the GNU project by Martin Boij.
   4
   5 Copyright 2009, 2010, 2012, 2014 Free Software Foundation, Inc.
   6
   7 This file is part of the GNU MP Library.
   8
   9 The GNU MP Library is free software; you can redistribute it and/or modify
  10 it under the terms of either:
  11
  12   * the GNU Lesser General Public License as published by the Free
  13     Software Foundation; either version 3 of the License, or (at your
  14     option) any later version.
  15
  16 or
  17
  18   * the GNU General Public License as published by the Free Software
  19     Foundation; either version 2 of the License, or (at your option) any
  20     later version.
  21
  22 or both in parallel, as here.
  23
  24 The GNU MP Library is distributed in the hope that it will be useful, but
  25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  26 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  27 for more details.
  28
  29 You should have received copies of the GNU General Public License and the
  30 GNU Lesser General Public License along with the GNU MP Library.  If not,
  31 see https://www.gnu.org/licenses/.  */
  32
  33 #include "gmp.h"
  34 #include "gmp-impl.h"
  35 #include "longlong.h"
  36
  37 #define SMALL 20
  38 #define MEDIUM 100
  39
  40 /* Return non-zero if {np,nn} == {xp,xn} ^ k.
  41    Algorithm:
  42        For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
  43        {xp,xn}^k. Stop if they don't match the s least significant limbs of
  44        {np,nn}.
  45
  46    FIXME: Low xn limbs can be expected to always match, if computed as a mod
  47    B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
  48    most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
  49    compare to {np, nn}. Or use an even cruder approximation based on fix-point
  50    base 2 logarithm.  */
  51 static int
  52 pow_equals (mp_srcptr np, mp_size_t n,
  53             mp_srcptr xp,mp_size_t xn,
  54             mp_limb_t k, mp_bitcnt_t f,
  55             mp_ptr tp)
  56 {
  57   mp_bitcnt_t y, z;
  58   mp_size_t bn;
  59   mp_limb_t h, l;
  60
  61   ASSERT (n > 1 || (n == 1 && np[0] > 1));
  62   ASSERT (np[n - 1] > 0);
  63   ASSERT (xn > 0);
  64
  65   if (xn == 1 && xp[0] == 1)
  66     return 0;
  67
  68   z = 1 + (n >> 1);
  69   for (bn = 1; bn < z; bn <<= 1)
  70     {
  71       mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
  72       if (mpn_cmp (tp, np, bn) != 0)
  73         return 0;
  74     }
  75
  76   /* Final check. Estimate the size of {xp,xn}^k before computing the power
  77      with full precision.  Optimization: It might pay off to make a more
  78      accurate estimation of the logarithm of {xp,xn}, rather than using the
  79      index of the MSB.  */
  80
  81   MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
  82   y -= 1;  /* msb_index (xp, xn) */
  83
  84   umul_ppmm (h, l, k, y);
  85   h -= l == 0;  --l;    /* two-limb decrement */
  86
  87   z = f - 1; /* msb_index (np, n) */
  88   if (h == 0 && l <= z)
  89     {
  90       mp_limb_t *tp2;
  91       mp_size_t i;
  92       int ans;
  93       mp_limb_t size;
  94       TMP_DECL;
  95
  96       size = l + k;
  97       ASSERT_ALWAYS (size >= k);
  98
  99       TMP_MARK;
 100       y = 2 + size / GMP_LIMB_BITS;
 101       tp2 = TMP_ALLOC_LIMBS (y);
 102
 103       i = mpn_pow_1 (tp, xp, xn, k, tp2);
 104       if (i == n && mpn_cmp (tp, np, n) == 0)
 105         ans = 1;
 106       else
 107         ans = 0;
 108       TMP_FREE;
 109       return ans;
 110     }
 111
 112   return 0;
 113 }
 114
 115
 116 /* Return non-zero if N = {np,n} is a kth power.
 117    I = {ip,n} = N^(-1) mod B^n.  */
 118 static int
 119 is_kth_power (mp_ptr rp, mp_srcptr np,
 120               mp_limb_t k, mp_srcptr ip,
 121               mp_size_t n, mp_bitcnt_t f,
 122               mp_ptr tp)
 123 {
 124   mp_bitcnt_t b;
 125   mp_size_t rn, xn;
 126
 127   ASSERT (n > 0);
 128   ASSERT ((k & 1) != 0 || k == 2);
 129   ASSERT ((np[0] & 1) != 0);
 130
 131   if (k == 2)
 132     {
 133       b = (f + 1) >> 1;
 134       rn = 1 + b / GMP_LIMB_BITS;
 135       if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
 136         {
 137           rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
 138           xn = rn;
 139           MPN_NORMALIZE (rp, xn);
 140           if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
 141             return 1;
 142
 143           /* Check if (2^b - r)^2 == n */
 144           mpn_neg (rp, rp, rn);
 145           rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
 146           MPN_NORMALIZE (rp, rn);
 147           if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
 148             return 1;
 149         }
 150     }
 151   else
 152     {
 153       b = 1 + (f - 1) / k;
 154       rn = 1 + (b - 1) / GMP_LIMB_BITS;
 155       mpn_brootinv (rp, ip, rn, k, tp);
 156       if ((b % GMP_LIMB_BITS) != 0)
 157         rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
 158       MPN_NORMALIZE (rp, rn);
 159       if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
 160         return 1;
 161     }
 162   MPN_ZERO (rp, rn); /* Untrash rp */
 163   return 0;
 164 }
 165
 166 static int
 167 perfpow (mp_srcptr np, mp_size_t n,
 168          mp_limb_t ub, mp_limb_t g,
 169          mp_bitcnt_t f, int neg)
 170 {
 171   mp_ptr ip, tp, rp;
 172   mp_limb_t k;
 173   int ans;
 174   mp_bitcnt_t b;
 175   gmp_primesieve_t ps;
 176   TMP_DECL;
 177
 178   ASSERT (n > 0);
 179   ASSERT ((np[0] & 1) != 0);
 180   ASSERT (ub > 0);
 181
 182   TMP_MARK;
 183   gmp_init_primesieve (&ps);
 184   b = (f + 3) >> 1;
 185
 186   TMP_ALLOC_LIMBS_3 (ip, n, rp, n, tp, 5 * n);
 187
 188   MPN_ZERO (rp, n);
 189
 190   /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
 191      roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
 192      similarly for nth roots. It should be more efficient to compute n^{1/2} as
 193      n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
 194      similar for kth roots if we switch to an iteration converging to n^{1/k -
 195      1}, and we can then eliminate this binvert call. */
 196   mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
 197   if (b % GMP_LIMB_BITS)
 198     ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
 199
 200   if (neg)
 201     gmp_nextprime (&ps);
 202
 203   ans = 0;
 204   if (g > 0)
 205     {
 206       ub = MIN (ub, g + 1);
 207       while ((k = gmp_nextprime (&ps)) < ub)
 208         {
 209           if ((g % k) == 0)
 210             {
 211               if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
 212                 {
 213                   ans = 1;
 214                   goto ret;
 215                 }
 216             }
 217         }
 218     }
 219   else
 220     {
 221       while ((k = gmp_nextprime (&ps)) < ub)
 222         {
 223           if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
 224             {
 225               ans = 1;
 226               goto ret;
 227             }
 228         }
 229     }
 230  ret:
 231   TMP_FREE;
 232   return ans;
 233 }
 234
 235 static const unsigned short nrtrial[] = { 100, 500, 1000 };
 236
 237 /* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
 238    number.  */
 239 static const double logs[] =
 240   { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
 241
 242 int
 243 mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
 244 {
 245   mp_limb_t *nc, factor, g;
 246   mp_limb_t exp, d;
 247   mp_bitcnt_t twos, count;
 248   int ans, where, neg, trial;
 249   TMP_DECL;
 250
 251   neg = n < 0;
 252   if (neg)
 253     {
 254       n = -n;
 255     }
 256
 257   if (n == 0 || (n == 1 && np[0] == 1)) /* Valgrind doesn't like
 258                                            (n <= (np[0] == 1)) */
 259     return 1;
 260
 261   TMP_MARK;
 262
 263   count = 0;
 264
 265   twos = mpn_scan1 (np, 0);
 266   if (twos != 0)
 267     {
 268       mp_size_t s;
 269       if (twos == 1)
 270         {
 271           return 0;
 272         }
 273       s = twos / GMP_LIMB_BITS;
 274       if (s + 1 == n && POW2_P (np[s]))
 275         {
 276           return ! (neg && POW2_P (twos));
 277         }
 278       count = twos % GMP_LIMB_BITS;
 279       n -= s;
 280       np += s;
 281       if (count > 0)
 282         {
 283           nc = TMP_ALLOC_LIMBS (n);
 284           mpn_rshift (nc, np, n, count);
 285           n -= (nc[n - 1] == 0);
 286           np = nc;
 287         }
 288     }
 289   g = twos;
 290
 291   trial = (n > SMALL) + (n > MEDIUM);
 292
 293   where = 0;
 294   factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
 295
 296   if (factor != 0)
 297     {
 298       if (count == 0) /* We did not allocate nc yet. */
 299         {
 300           nc = TMP_ALLOC_LIMBS (n);
 301         }
 302
 303       /* Remove factors found by trialdiv.  Optimization: If remove
 304          define _itch, we can allocate its scratch just once */
 305
 306       do
 307         {
 308           binvert_limb (d, factor);
 309
 310           /* After the first round we always have nc == np */
 311           exp = mpn_remove (nc, &n, np, n, &d, 1, ~(mp_bitcnt_t)0);
 312
 313           if (g == 0)
 314             g = exp;
 315           else
 316             g = mpn_gcd_1 (&g, 1, exp);
 317
 318           if (g == 1)
 319             {
 320               ans = 0;
 321               goto ret;
 322             }
 323
 324           if ((n == 1) & (nc[0] == 1))
 325             {
 326               ans = ! (neg && POW2_P (g));
 327               goto ret;
 328             }
 329
 330           np = nc;
 331           factor = mpn_trialdiv (np, n, nrtrial[trial], &where);
 332         }
 333       while (factor != 0);
 334     }
 335
 336   MPN_SIZEINBASE_2EXP(count, np, n, 1);   /* log (np) + 1 */
 337   d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
 338   ans = perfpow (np, n, d, g, count, neg);
 339
 340  ret:
 341   TMP_FREE;
 342   return ans;
 343 }