source/libs/gmp/gmp-src/mpn/generic/toom22_mul.c

   1 /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
   2    accurately, bn <= an < 2bn.
   3
   4    Contributed to the GNU project by Torbjorn Granlund.
   5
   6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   9
  10 Copyright 2006-2010, 2012, 2014 Free Software Foundation, Inc.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of either:
  16
  17   * the GNU Lesser General Public License as published by the Free
  18     Software Foundation; either version 3 of the License, or (at your
  19     option) any later version.
  20
  21 or
  22
  23   * the GNU General Public License as published by the Free Software
  24     Foundation; either version 2 of the License, or (at your option) any
  25     later version.
  26
  27 or both in parallel, as here.
  28
  29 The GNU MP Library is distributed in the hope that it will be useful, but
  30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  32 for more details.
  33
  34 You should have received copies of the GNU General Public License and the
  35 GNU Lesser General Public License along with the GNU MP Library.  If not,
  36 see https://www.gnu.org/licenses/.  */
  37
  38
  39 #include "gmp.h"
  40 #include "gmp-impl.h"
  41
  42 /* Evaluate in: -1, 0, +inf
  43
  44   <-s--><--n-->
  45    ____ ______
  46   |_a1_|___a0_|
  47    |b1_|___b0_|
  48    <-t-><--n-->
  49
  50   v0  =  a0     * b0       #   A(0)*B(0)
  51   vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
  52   vinf=      a1 *     b1   # A(inf)*B(inf)
  53 */
  54
  55 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
  56 #define MAYBE_mul_toom22   1
  57 #else
  58 #define MAYBE_mul_toom22                                                \
  59   (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
  60 #endif
  61
  62 #define TOOM22_MUL_N_REC(p, a, b, n, ws)                                \
  63   do {                                                                  \
  64     if (! MAYBE_mul_toom22                                              \
  65         || BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
  66       mpn_mul_basecase (p, a, n, b, n);                                 \
  67     else                                                                \
  68       mpn_toom22_mul (p, a, n, b, n, ws);                               \
  69   } while (0)
  70
  71 /* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
  72    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
  73    relative unbalance will become a larger and larger relative unbalance with
  74    each recursion (the difference s-t will be invariant over recursive calls).
  75    Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
  76    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
  77 #define TOOM22_MUL_REC(p, a, an, b, bn, ws)                             \
  78   do {                                                                  \
  79     if (! MAYBE_mul_toom22                                              \
  80         || BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))                  \
  81       mpn_mul_basecase (p, a, an, b, bn);                               \
  82     else if (4 * an < 5 * bn)                                           \
  83       mpn_toom22_mul (p, a, an, b, bn, ws);                             \
  84     else                                                                \
  85       mpn_toom32_mul (p, a, an, b, bn, ws);                             \
  86   } while (0)
  87
  88 void
  89 mpn_toom22_mul (mp_ptr pp,
  90                 mp_srcptr ap, mp_size_t an,
  91                 mp_srcptr bp, mp_size_t bn,
  92                 mp_ptr scratch)
  93 {
  94   const int __gmpn_cpuvec_initialized = 1;
  95   mp_size_t n, s, t;
  96   int vm1_neg;
  97   mp_limb_t cy, cy2;
  98   mp_ptr asm1;
  99   mp_ptr bsm1;
 100
 101 #define a0  ap
 102 #define a1  (ap + n)
 103 #define b0  bp
 104 #define b1  (bp + n)
 105
 106   s = an >> 1;
 107   n = an - s;
 108   t = bn - n;
 109
 110   ASSERT (an >= bn);
 111
 112   ASSERT (0 < s && s <= n && s >= n - 1);
 113   ASSERT (0 < t && t <= s);
 114
 115   asm1 = pp;
 116   bsm1 = pp + n;
 117
 118   vm1_neg = 0;
 119
 120   /* Compute asm1.  */
 121   if (s == n)
 122     {
 123       if (mpn_cmp (a0, a1, n) < 0)
 124         {
 125           mpn_sub_n (asm1, a1, a0, n);
 126           vm1_neg = 1;
 127         }
 128       else
 129         {
 130           mpn_sub_n (asm1, a0, a1, n);
 131         }
 132     }
 133   else /* n - s == 1 */
 134     {
 135       if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
 136         {
 137           mpn_sub_n (asm1, a1, a0, s);
 138           asm1[s] = 0;
 139           vm1_neg = 1;
 140         }
 141       else
 142         {
 143           asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
 144         }
 145     }
 146
 147   /* Compute bsm1.  */
 148   if (t == n)
 149     {
 150       if (mpn_cmp (b0, b1, n) < 0)
 151         {
 152           mpn_sub_n (bsm1, b1, b0, n);
 153           vm1_neg ^= 1;
 154         }
 155       else
 156         {
 157           mpn_sub_n (bsm1, b0, b1, n);
 158         }
 159     }
 160   else
 161     {
 162       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
 163         {
 164           mpn_sub_n (bsm1, b1, b0, t);
 165           MPN_ZERO (bsm1 + t, n - t);
 166           vm1_neg ^= 1;
 167         }
 168       else
 169         {
 170           mpn_sub (bsm1, b0, n, b1, t);
 171         }
 172     }
 173
 174 #define v0      pp                              /* 2n */
 175 #define vinf    (pp + 2 * n)                    /* s+t */
 176 #define vm1     scratch                         /* 2n */
 177 #define scratch_out     scratch + 2 * n
 178
 179   /* vm1, 2n limbs */
 180   TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
 181
 182   if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
 183   else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
 184
 185   /* v0, 2n limbs */
 186   TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
 187
 188   /* H(v0) + L(vinf) */
 189   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
 190
 191   /* L(v0) + H(v0) */
 192   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
 193
 194   /* L(vinf) + H(vinf) */
 195   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
 196
 197   if (vm1_neg)
 198     cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
 199   else
 200     cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
 201
 202   ASSERT (cy + 1  <= 3);
 203   ASSERT (cy2 <= 2);
 204
 205   MPN_INCR_U (pp + 2 * n, s + t, cy2);
 206   if (LIKELY (cy <= 2))
 207     /* if s+t==n, cy is zero, but we should not acces pp[3*n] at all. */
 208     MPN_INCR_U (pp + 3 * n, s + t - n, cy);
 209   else
 210     MPN_DECR_U (pp + 3 * n, s + t - n, 1);
 211 }