source/libs/gmp/gmp-src/mpn/generic/toom_interpolate_5pts.c

   1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
   2
   3    Contributed to the GNU project by Robert Harley.
   4    Improvements by Paul Zimmermann and Marco Bodrato.
   5
   6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   9
  10 Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of either:
  16
  17   * the GNU Lesser General Public License as published by the Free
  18     Software Foundation; either version 3 of the License, or (at your
  19     option) any later version.
  20
  21 or
  22
  23   * the GNU General Public License as published by the Free Software
  24     Foundation; either version 2 of the License, or (at your option) any
  25     later version.
  26
  27 or both in parallel, as here.
  28
  29 The GNU MP Library is distributed in the hope that it will be useful, but
  30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  32 for more details.
  33
  34 You should have received copies of the GNU General Public License and the
  35 GNU Lesser General Public License along with the GNU MP Library.  If not,
  36 see https://www.gnu.org/licenses/.  */
  37
  38 #include "gmp.h"
  39 #include "gmp-impl.h"
  40
  41 void
  42 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
  43                            mp_size_t k, mp_size_t twor, int sa,
  44                            mp_limb_t vinf0)
  45 {
  46   mp_limb_t cy, saved;
  47   mp_size_t twok;
  48   mp_size_t kk1;
  49   mp_ptr c1, v1, c3, vinf;
  50
  51   twok = k + k;
  52   kk1 = twok + 1;
  53
  54   c1 = c  + k;
  55   v1 = c1 + k;
  56   c3 = v1 + k;
  57   vinf = c3 + k;
  58
  59 #define v0 (c)
  60   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
  61      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
  62   */
  63   if (sa)
  64     ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
  65   else
  66     ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
  67
  68   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
  69        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
  70
  71   ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
  72                                                       /* (5 3 1 1 0)*/
  73
  74   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
  75        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
  76
  77   /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
  78      tm1 >= 0                                         (0  1 0  1 0)
  79      No carry comes out from {v1, kk1} +/- {vm1, kk1},
  80      and the division by two is exact.
  81      If (sa!=0) the sign of vm1 is negative */
  82   if (sa)
  83     {
  84 #ifdef HAVE_NATIVE_mpn_rsh1add_n
  85       mpn_rsh1add_n (vm1, v1, vm1, kk1);
  86 #else
  87       ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
  88       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
  89 #endif
  90     }
  91   else
  92     {
  93 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
  94       mpn_rsh1sub_n (vm1, v1, vm1, kk1);
  95 #else
  96       ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
  97       ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
  98 #endif
  99     }
 100
 101   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
 102        v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
 103
 104   /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
 105      t1 >= 0
 106   */
 107   vinf[0] -= mpn_sub_n (v1, v1, c, twok);
 108
 109   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
 110        v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
 111
 112   /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
 113      t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
 114   */
 115 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
 116   mpn_rsh1sub_n (v2, v2, v1, kk1);
 117 #else
 118   ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
 119   ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
 120 #endif
 121
 122   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
 123        v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
 124
 125   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
 126      result is v1 >= 0
 127   */
 128   ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
 129
 130   /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
 131   cy = mpn_add_n (c1, c1, vm1, kk1);
 132   MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
 133   /* Memory allocated for vm1 is now free, it can be recycled ...*/
 134
 135   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
 136      result is v2 >= 0 */
 137   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
 138   vinf[0] = vinf0;       /* Set the right value for vinf0                     */
 139 #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
 140   cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
 141 #else
 142   /* Overwrite unused vm1 */
 143   cy = mpn_lshift (vm1, vinf, twor, 1);
 144   cy += mpn_sub_n (v2, v2, vm1, twor);
 145 #endif
 146   MPN_DECR_U (v2 + twor, kk1 - twor, cy);
 147
 148   /* Current matrix is
 149      [1 0 0 0 0; vinf
 150       0 1 0 0 0; v2
 151       1 0 1 0 0; v1
 152       0 1 0 1 0; vm1
 153       0 0 0 0 1] v0
 154      Some values already are in-place (we added vm1 in the correct position)
 155      | vinf|  v1 |  v0 |
 156               | vm1 |
 157      One still is in a separated area
 158         | +v2 |
 159      We have to compute v1-=vinf; vm1 -= v2,
 160            |-vinf|
 161               | -v2 |
 162      Carefully reordering operations we can avoid to compute twice the sum
 163      of the high half of v2 plus the low half of vinf.
 164   */
 165
 166   /* Add the high half of t2 in {vinf} */
 167   if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
 168     cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
 169     MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
 170   } else { /* triggered only by very unbalanced cases like
 171               (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
 172     ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
 173   }
 174   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
 175      result is >= 0 */
 176   /* Side effect: we also subtracted (high half) vm1 -= v2 */
 177   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
 178   vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
 179   vinf[0] = saved;
 180   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
 181
 182   /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
 183      Operate only on the low half.
 184   */
 185   cy = mpn_sub_n (c1, c1, v2, k);
 186   MPN_DECR_U (v1, kk1, cy);
 187
 188   /********************* Beginning the final phase **********************/
 189
 190   /* Most of the recomposition was done */
 191
 192   /* add t2 in {c+3k, ...}, but only the low half */
 193   cy = mpn_add_n (c3, c3, v2, k);
 194   vinf[0] += cy;
 195   ASSERT(vinf[0] >= cy); /* No carry */
 196   MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
 197
 198 #undef v0
 199 }