source/libs/gmp/gmp-src/mpn/generic/toom43_mul.c

   1 /* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
   2    times as large as bn.  Or more accurately, bn < an < 2 bn.
   3
   4    Contributed to the GNU project by Marco Bodrato.
   5
   6    The idea of applying toom to unbalanced multiplication is due to Marco
   7    Bodrato and Alberto Zanoni.
   8
   9    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
  10    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
  11    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  12
  13 Copyright 2009 Free Software Foundation, Inc.
  14
  15 This file is part of the GNU MP Library.
  16
  17 The GNU MP Library is free software; you can redistribute it and/or modify
  18 it under the terms of either:
  19
  20   * the GNU Lesser General Public License as published by the Free
  21     Software Foundation; either version 3 of the License, or (at your
  22     option) any later version.
  23
  24 or
  25
  26   * the GNU General Public License as published by the Free Software
  27     Foundation; either version 2 of the License, or (at your option) any
  28     later version.
  29
  30 or both in parallel, as here.
  31
  32 The GNU MP Library is distributed in the hope that it will be useful, but
  33 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  34 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  35 for more details.
  36
  37 You should have received copies of the GNU General Public License and the
  38 GNU Lesser General Public License along with the GNU MP Library.  If not,
  39 see https://www.gnu.org/licenses/.  */
  40
  41
  42 #include "gmp.h"
  43 #include "gmp-impl.h"
  44
  45 /* Evaluate in: -2, -1, 0, +1, +2, +inf
  46
  47   <-s-><--n--><--n--><--n-->
  48    ___ ______ ______ ______
  49   |a3_|___a2_|___a1_|___a0_|
  50         |_b2_|___b1_|___b0_|
  51         <-t--><--n--><--n-->
  52
  53   v0  =  a0             * b0          #   A(0)*B(0)
  54   v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
  55   vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
  56   v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
  57   vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
  58   vinf=              a3 *         b2  # A(inf)*B(inf)
  59 */
  60
  61 void
  62 mpn_toom43_mul (mp_ptr pp,
  63                 mp_srcptr ap, mp_size_t an,
  64                 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
  65 {
  66   mp_size_t n, s, t;
  67   enum toom6_flags flags;
  68   mp_limb_t cy;
  69
  70 #define a0  ap
  71 #define a1  (ap + n)
  72 #define a2  (ap + 2 * n)
  73 #define a3  (ap + 3 * n)
  74 #define b0  bp
  75 #define b1  (bp + n)
  76 #define b2  (bp + 2 * n)
  77
  78   n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
  79
  80   s = an - 3 * n;
  81   t = bn - 2 * n;
  82
  83   ASSERT (0 < s && s <= n);
  84   ASSERT (0 < t && t <= n);
  85
  86   /* This is true whenever an >= 25 or bn >= 19, I think. It
  87      guarantees that we can fit 5 values of size n+1 in the product
  88      area. */
  89   ASSERT (s+t >= 5);
  90
  91 #define v0    pp                                /* 2n */
  92 #define vm1   (scratch)                         /* 2n+1 */
  93 #define v1    (pp + 2*n)                        /* 2n+1 */
  94 #define vm2   (scratch + 2 * n + 1)             /* 2n+1 */
  95 #define v2    (scratch + 4 * n + 2)             /* 2n+1 */
  96 #define vinf  (pp + 5 * n)                      /* s+t */
  97 #define bs1    pp                               /* n+1 */
  98 #define bsm1  (scratch + 2 * n + 2)             /* n+1 */
  99 #define asm1  (scratch + 3 * n + 3)             /* n+1 */
 100 #define asm2  (scratch + 4 * n + 4)             /* n+1 */
 101 #define bsm2  (pp + n + 1)                      /* n+1 */
 102 #define bs2   (pp + 2 * n + 2)                  /* n+1 */
 103 #define as2   (pp + 3 * n + 3)                  /* n+1 */
 104 #define as1   (pp + 4 * n + 4)                  /* n+1 */
 105
 106   /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
 107      limb, because products will overwrite 2n+2 limbs. */
 108
 109 #define a0a2  scratch
 110 #define b0b2  scratch
 111 #define a1a3  asm1
 112 #define b1d   bsm1
 113
 114   /* Compute as2 and asm2.  */
 115   flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
 116
 117   /* Compute bs2 and bsm2.  */
 118   b1d[n] = mpn_lshift (b1d, b1, n, 1);                  /*       2b1      */
 119   cy  = mpn_lshift (b0b2, b2, t, 2);                    /*  4b2           */
 120   cy += mpn_add_n (b0b2, b0b2, b0, t);                  /*  4b2      + b0 */
 121   if (t != n)
 122     cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
 123   b0b2[n] = cy;
 124
 125 #if HAVE_NATIVE_mpn_add_n_sub_n
 126   if (mpn_cmp (b0b2, b1d, n+1) < 0)
 127     {
 128       mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
 129       flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
 130     }
 131   else
 132     {
 133       mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
 134     }
 135 #else
 136   mpn_add_n (bs2, b0b2, b1d, n+1);
 137   if (mpn_cmp (b0b2, b1d, n+1) < 0)
 138     {
 139       mpn_sub_n (bsm2, b1d, b0b2, n+1);
 140       flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
 141     }
 142   else
 143     {
 144       mpn_sub_n (bsm2, b0b2, b1d, n+1);
 145     }
 146 #endif
 147
 148   /* Compute as1 and asm1.  */
 149   flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2)));
 150
 151   /* Compute bs1 and bsm1.  */
 152   bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
 153 #if HAVE_NATIVE_mpn_add_n_sub_n
 154   if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
 155     {
 156       cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
 157       bs1[n] = cy >> 1;
 158       flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
 159     }
 160   else
 161     {
 162       cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
 163       bs1[n] = bsm1[n] + (cy >> 1);
 164       bsm1[n]-= cy & 1;
 165     }
 166 #else
 167   bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
 168   if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
 169     {
 170       mpn_sub_n (bsm1, b1, bsm1, n);
 171       flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
 172     }
 173   else
 174     {
 175       bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
 176     }
 177 #endif
 178
 179   ASSERT (as1[n] <= 3);
 180   ASSERT (bs1[n] <= 2);
 181   ASSERT (asm1[n] <= 1);
 182   ASSERT (bsm1[n] <= 1);
 183   ASSERT (as2[n] <=14);
 184   ASSERT (bs2[n] <= 6);
 185   ASSERT (asm2[n] <= 9);
 186   ASSERT (bsm2[n] <= 4);
 187
 188   /* vm1, 2n+1 limbs */
 189   mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
 190
 191   /* vm2, 2n+1 limbs */
 192   mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
 193
 194   /* v2, 2n+1 limbs */
 195   mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
 196
 197   /* v1, 2n+1 limbs */
 198   mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
 199
 200   /* vinf, s+t limbs */   /* W0 */
 201   if (s > t)  mpn_mul (vinf, a3, s, b2, t);
 202   else        mpn_mul (vinf, b2, t, a3, s);
 203
 204   /* v0, 2n limbs */
 205   mpn_mul_n (v0, ap, bp, n);  /* W5 */
 206
 207   mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
 208
 209 #undef v0
 210 #undef vm1
 211 #undef v1
 212 #undef vm2
 213 #undef v2
 214 #undef vinf
 215 #undef bs1
 216 #undef bs2
 217 #undef bsm1
 218 #undef bsm2
 219 #undef asm1
 220 #undef asm2
 221 /* #undef as1 */
 222 /* #undef as2 */
 223 #undef a0a2
 224 #undef b0b2
 225 #undef a1a3
 226 #undef b1d
 227 #undef a0
 228 #undef a1
 229 #undef a2
 230 #undef a3
 231 #undef b0
 232 #undef b1
 233 #undef b2
 234 }