source/libs/gmp/gmp-src/mpn/generic/toom42_mul.c

   1 /* mpn_toom42_mul -- Multiply {ap,an} and {bp,bn} where an is nominally twice
   2    as large as bn.  Or more accurately, (3/2)bn < an < 4bn.
   3
   4    Contributed to the GNU project by Torbjorn Granlund.
   5    Additional improvements by Marco Bodrato.
   6
   7    The idea of applying toom to unbalanced multiplication is due to Marco
   8    Bodrato and Alberto Zanoni.
   9
  10    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
  11    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
  12    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  13
  14 Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc.
  15
  16 This file is part of the GNU MP Library.
  17
  18 The GNU MP Library is free software; you can redistribute it and/or modify
  19 it under the terms of either:
  20
  21   * the GNU Lesser General Public License as published by the Free
  22     Software Foundation; either version 3 of the License, or (at your
  23     option) any later version.
  24
  25 or
  26
  27   * the GNU General Public License as published by the Free Software
  28     Foundation; either version 2 of the License, or (at your option) any
  29     later version.
  30
  31 or both in parallel, as here.
  32
  33 The GNU MP Library is distributed in the hope that it will be useful, but
  34 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  35 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  36 for more details.
  37
  38 You should have received copies of the GNU General Public License and the
  39 GNU Lesser General Public License along with the GNU MP Library.  If not,
  40 see https://www.gnu.org/licenses/.  */
  41
  42
  43 #include "gmp.h"
  44 #include "gmp-impl.h"
  45
  46 /* Evaluate in: -1, 0, +1, +2, +inf
  47
  48   <-s-><--n--><--n--><--n-->
  49    ___ ______ ______ ______
  50   |a3_|___a2_|___a1_|___a0_|
  51                |_b1_|___b0_|
  52                <-t--><--n-->
  53
  54   v0  =  a0             * b0      #   A(0)*B(0)
  55   v1  = (a0+ a1+ a2+ a3)*(b0+ b1) #   A(1)*B(1)      ah  <= 3  bh <= 1
  56   vm1 = (a0- a1+ a2- a3)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh  = 0
  57   v2  = (a0+2a1+4a2+8a3)*(b0+2b1) #   A(2)*B(2)      ah  <= 14 bh <= 2
  58   vinf=              a3 *     b1  # A(inf)*B(inf)
  59 */
  60
  61 #define TOOM42_MUL_N_REC(p, a, b, n, ws)                                \
  62   do {                                                                  \
  63     mpn_mul_n (p, a, b, n);                                             \
  64   } while (0)
  65
  66 void
  67 mpn_toom42_mul (mp_ptr pp,
  68                 mp_srcptr ap, mp_size_t an,
  69                 mp_srcptr bp, mp_size_t bn,
  70                 mp_ptr scratch)
  71 {
  72   mp_size_t n, s, t;
  73   int vm1_neg;
  74   mp_limb_t cy, vinf0;
  75   mp_ptr a0_a2;
  76   mp_ptr as1, asm1, as2;
  77   mp_ptr bs1, bsm1, bs2;
  78   mp_ptr tmp;
  79   TMP_DECL;
  80
  81 #define a0  ap
  82 #define a1  (ap + n)
  83 #define a2  (ap + 2*n)
  84 #define a3  (ap + 3*n)
  85 #define b0  bp
  86 #define b1  (bp + n)
  87
  88   n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
  89
  90   s = an - 3 * n;
  91   t = bn - n;
  92
  93   ASSERT (0 < s && s <= n);
  94   ASSERT (0 < t && t <= n);
  95
  96   TMP_MARK;
  97
  98   tmp = TMP_ALLOC_LIMBS (6 * n + 5);
  99   as1  = tmp; tmp += n + 1;
 100   asm1 = tmp; tmp += n + 1;
 101   as2  = tmp; tmp += n + 1;
 102   bs1  = tmp; tmp += n + 1;
 103   bsm1 = tmp; tmp += n;
 104   bs2  = tmp; tmp += n + 1;
 105
 106   a0_a2 = pp;
 107
 108   /* Compute as1 and asm1.  */
 109   vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
 110
 111   /* Compute as2.  */
 112 #if HAVE_NATIVE_mpn_addlsh1_n
 113   cy  = mpn_addlsh1_n (as2, a2, a3, s);
 114   if (s != n)
 115     cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
 116   cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
 117   cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
 118 #else
 119   cy  = mpn_lshift (as2, a3, s, 1);
 120   cy += mpn_add_n (as2, a2, as2, s);
 121   if (s != n)
 122     cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
 123   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
 124   cy += mpn_add_n (as2, a1, as2, n);
 125   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
 126   cy += mpn_add_n (as2, a0, as2, n);
 127 #endif
 128   as2[n] = cy;
 129
 130   /* Compute bs1 and bsm1.  */
 131   if (t == n)
 132     {
 133 #if HAVE_NATIVE_mpn_add_n_sub_n
 134       if (mpn_cmp (b0, b1, n) < 0)
 135         {
 136           cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
 137           vm1_neg ^= 1;
 138         }
 139       else
 140         {
 141           cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
 142         }
 143       bs1[n] = cy >> 1;
 144 #else
 145       bs1[n] = mpn_add_n (bs1, b0, b1, n);
 146
 147       if (mpn_cmp (b0, b1, n) < 0)
 148         {
 149           mpn_sub_n (bsm1, b1, b0, n);
 150           vm1_neg ^= 1;
 151         }
 152       else
 153         {
 154           mpn_sub_n (bsm1, b0, b1, n);
 155         }
 156 #endif
 157     }
 158   else
 159     {
 160       bs1[n] = mpn_add (bs1, b0, n, b1, t);
 161
 162       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
 163         {
 164           mpn_sub_n (bsm1, b1, b0, t);
 165           MPN_ZERO (bsm1 + t, n - t);
 166           vm1_neg ^= 1;
 167         }
 168       else
 169         {
 170           mpn_sub (bsm1, b0, n, b1, t);
 171         }
 172     }
 173
 174   /* Compute bs2, recycling bs1. bs2=bs1+b1  */
 175   mpn_add (bs2, bs1, n + 1, b1, t);
 176
 177   ASSERT (as1[n] <= 3);
 178   ASSERT (bs1[n] <= 1);
 179   ASSERT (asm1[n] <= 1);
 180 /*ASSERT (bsm1[n] == 0);*/
 181   ASSERT (as2[n] <= 14);
 182   ASSERT (bs2[n] <= 2);
 183
 184 #define v0    pp                                /* 2n */
 185 #define v1    (pp + 2 * n)                      /* 2n+1 */
 186 #define vinf  (pp + 4 * n)                      /* s+t */
 187 #define vm1   scratch                           /* 2n+1 */
 188 #define v2    (scratch + 2 * n + 1)             /* 2n+2 */
 189 #define scratch_out     scratch + 4 * n + 4     /* Currently unused. */
 190
 191   /* vm1, 2n+1 limbs */
 192   TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
 193   cy = 0;
 194   if (asm1[n] != 0)
 195     cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
 196   vm1[2 * n] = cy;
 197
 198   TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);  /* v2, 2n+1 limbs */
 199
 200   /* vinf, s+t limbs */
 201   if (s > t)  mpn_mul (vinf, a3, s, b1, t);
 202   else        mpn_mul (vinf, b1, t, a3, s);
 203
 204   vinf0 = vinf[0];                              /* v1 overlaps with this */
 205
 206   /* v1, 2n+1 limbs */
 207   TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
 208   if (as1[n] == 1)
 209     {
 210       cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
 211     }
 212   else if (as1[n] == 2)
 213     {
 214 #if HAVE_NATIVE_mpn_addlsh1_n
 215       cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
 216 #else
 217       cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
 218 #endif
 219     }
 220   else if (as1[n] == 3)
 221     {
 222       cy = 3 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3));
 223     }
 224   else
 225     cy = 0;
 226   if (bs1[n] != 0)
 227     cy += mpn_add_n (v1 + n, v1 + n, as1, n);
 228   v1[2 * n] = cy;
 229
 230   TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);        /* v0, 2n limbs */
 231
 232   mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
 233
 234   TMP_FREE;
 235 }