source/libs/gmp/gmp-src/mpn/generic/broot.c

   1 /* mpn_broot -- Compute hensel sqrt
   2
   3    Contributed to the GNU project by Niels Möller
   4
   5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
   8
   9 Copyright 2012 Free Software Foundation, Inc.
  10
  11 This file is part of the GNU MP Library.
  12
  13 The GNU MP Library is free software; you can redistribute it and/or modify
  14 it under the terms of either:
  15
  16   * the GNU Lesser General Public License as published by the Free
  17     Software Foundation; either version 3 of the License, or (at your
  18     option) any later version.
  19
  20 or
  21
  22   * the GNU General Public License as published by the Free Software
  23     Foundation; either version 2 of the License, or (at your option) any
  24     later version.
  25
  26 or both in parallel, as here.
  27
  28 The GNU MP Library is distributed in the hope that it will be useful, but
  29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  31 for more details.
  32
  33 You should have received copies of the GNU General Public License and the
  34 GNU Lesser General Public License along with the GNU MP Library.  If not,
  35 see https://www.gnu.org/licenses/.  */
  36
  37 #include "gmp.h"
  38 #include "gmp-impl.h"
  39
  40 /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
  41    typical use will have e small. */
  42 static mp_limb_t
  43 powlimb (mp_limb_t a, mp_limb_t e)
  44 {
  45   mp_limb_t r = 1;
  46   mp_limb_t s = a;
  47
  48   for (r = 1, s = a; e > 0; e >>= 1, s *= s)
  49     if (e & 1)
  50       r *= s;
  51
  52   return r;
  53 }
  54
  55 /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
  56
  57    Iterates
  58
  59      r' <-- r - r * (a^{k-1} r^k - 1) / n
  60
  61    If
  62
  63      a^{k-1} r^k = 1 (mod 2^m),
  64
  65    then
  66
  67      a^{k-1} r'^k = 1 (mod 2^{2m}),
  68
  69    Compute the update term as
  70
  71      r' = r - (a^{k-1} r^{k+1} - r) / k
  72
  73    where we still have cancellation of low limbs.
  74
  75  */
  76 void
  77 mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
  78 {
  79   mp_size_t sizes[GMP_LIMB_BITS * 2];
  80   mp_ptr akm1, tp, rnp, ep;
  81   mp_limb_t a0, r0, km1, kp1h, kinv;
  82   mp_size_t rn;
  83   unsigned i;
  84
  85   TMP_DECL;
  86
  87   ASSERT (n > 0);
  88   ASSERT (ap[0] & 1);
  89   ASSERT (k & 1);
  90   ASSERT (k >= 3);
  91
  92   TMP_MARK;
  93
  94   akm1 = TMP_ALLOC_LIMBS (4*n);
  95   tp = akm1 + n;
  96
  97   km1 = k-1;
  98   /* FIXME: Could arrange the iteration so we don't need to compute
  99      this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
 100      that we can use wraparound also for a*r, since the low half is
 101      unchanged from the previous iteration. Or possibly mulmid. Also,
 102      a r = a^{1/k}, so we get that value too, for free? */
 103   mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
 104
 105   a0 = ap[0];
 106   binvert_limb (kinv, k);
 107
 108   /* 4 bits: a^{1/k - 1} (mod 16):
 109
 110         a % 8
 111         1 3 5 7
 112    k%4 +-------
 113      1 |1 1 1 1
 114      3 |1 9 9 1
 115   */
 116   r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
 117   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
 118   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
 119   r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
 120 #if GMP_NUMB_BITS > 32
 121   {
 122     unsigned prec = 32;
 123     do
 124       {
 125         r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
 126         prec *= 2;
 127       }
 128     while (prec < GMP_NUMB_BITS);
 129   }
 130 #endif
 131
 132   rp[0] = r0;
 133   if (n == 1)
 134     {
 135       TMP_FREE;
 136       return;
 137     }
 138
 139   /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
 140   kp1h = k/2 + 1;
 141
 142   /* FIXME: Special case for two limb iteration. */
 143   rnp = TMP_ALLOC_LIMBS (2*n + 1);
 144   ep = rnp + n;
 145
 146   /* FIXME: Possible to this on the fly with some bit fiddling. */
 147   for (i = 0; n > 1; n = (n + 1)/2)
 148     sizes[i++] = n;
 149
 150   rn = 1;
 151
 152   while (i-- > 0)
 153     {
 154       /* Compute x^{k+1}. */
 155       mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
 156                                final iteration. */
 157       mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
 158
 159       /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
 160
 161       mpn_mullo_n (ep, rnp, akm1, sizes[i]);
 162       ASSERT (mpn_cmp (ep, rp, rn) == 0);
 163
 164       ASSERT (sizes[i] <= 2*rn);
 165       mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
 166       mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
 167       rn = sizes[i];
 168     }
 169   TMP_FREE;
 170 }
 171
 172 /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
 173 void
 174 mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
 175 {
 176   mp_ptr tp;
 177   TMP_DECL;
 178
 179   ASSERT (n > 0);
 180   ASSERT (ap[0] & 1);
 181   ASSERT (k & 1);
 182
 183   if (k == 1)
 184     {
 185       MPN_COPY (rp, ap, n);
 186       return;
 187     }
 188
 189   TMP_MARK;
 190   tp = TMP_ALLOC_LIMBS (n);
 191
 192   mpn_broot_invm1 (tp, ap, n, k);
 193   mpn_mullo_n (rp, tp, ap, n);
 194
 195   TMP_FREE;
 196 }