contrib/gmp/mpz/aorsmul_i.c

   1 /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
   2
   3    THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
   4    ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
   5    COMPLETELY IN FUTURE GNU MP RELEASES.
   6
   7 Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 3 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  23
  24 #include "gmp.h"
  25 #include "gmp-impl.h"
  26
  27
  28 #if HAVE_NATIVE_mpn_mul_1c
  29 #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
  30   do {                                                  \
  31     (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
  32   } while (0)
  33 #else
  34 #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
  35   do {                                                  \
  36     mp_limb_t __cy;                                     \
  37     __cy = mpn_mul_1 (dst, src, size, n);               \
  38     (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
  39   } while (0)
  40 #endif
  41
  42
  43 /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
  44
  45    All that's needed to account for negative w or x is to flip "sub".
  46
  47    The final w will retain its sign, unless an underflow occurs in a submul
  48    of absolute values, in which case it's flipped.
  49
  50    If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
  51    used.  The alternative would be mpn_mul_1 into temporary space followed
  52    by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
  53    a chance of being faster since it involves only one set of carry
  54    propagations, not two.  Note that doing an addmul_1 with a
  55    twos-complement negative y doesn't work, because it effectively adds an
  56    extra x * 2^GMP_LIMB_BITS.  */
  57
  58 REGPARM_ATTR(1) void
  59 mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
  60 {
  61   mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
  62   mp_srcptr  xp;
  63   mp_ptr     wp;
  64   mp_limb_t  cy;
  65
  66   /* w unaffected if x==0 or y==0 */
  67   xsize = SIZ (x);
  68   if (xsize == 0 || y == 0)
  69     return;
  70
  71   sub ^= xsize;
  72   xsize = ABS (xsize);
  73
  74   wsize_signed = SIZ (w);
  75   if (wsize_signed == 0)
  76     {
  77       /* nothing to add to, just set x*y, "sub" gives the sign */
  78       MPZ_REALLOC (w, xsize+1);
  79       wp = PTR (w);
  80       cy = mpn_mul_1 (wp, PTR(x), xsize, y);
  81       wp[xsize] = cy;
  82       xsize += (cy != 0);
  83       SIZ (w) = (sub >= 0 ? xsize : -xsize);
  84       return;
  85     }
  86
  87   sub ^= wsize_signed;
  88   wsize = ABS (wsize_signed);
  89
  90   new_wsize = MAX (wsize, xsize);
  91   MPZ_REALLOC (w, new_wsize+1);
  92   wp = PTR (w);
  93   xp = PTR (x);
  94   min_size = MIN (wsize, xsize);
  95
  96   if (sub >= 0)
  97     {
  98       /* addmul of absolute values */
  99
 100       cy = mpn_addmul_1 (wp, xp, min_size, y);
 101       wp += min_size;
 102       xp += min_size;
 103
 104       dsize = xsize - wsize;
 105 #if HAVE_NATIVE_mpn_mul_1c
 106       if (dsize > 0)
 107         cy = mpn_mul_1c (wp, xp, dsize, y, cy);
 108       else if (dsize < 0)
 109         {
 110           dsize = -dsize;
 111           cy = mpn_add_1 (wp, wp, dsize, cy);
 112         }
 113 #else
 114       if (dsize != 0)
 115         {
 116           mp_limb_t  cy2;
 117           if (dsize > 0)
 118             cy2 = mpn_mul_1 (wp, xp, dsize, y);
 119           else
 120             {
 121               dsize = -dsize;
 122               cy2 = 0;
 123             }
 124           cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
 125         }
 126 #endif
 127
 128       wp[dsize] = cy;
 129       new_wsize += (cy != 0);
 130     }
 131   else
 132     {
 133       /* submul of absolute values */
 134
 135       cy = mpn_submul_1 (wp, xp, min_size, y);
 136       if (wsize >= xsize)
 137         {
 138           /* if w bigger than x, then propagate borrow through it */
 139           if (wsize != xsize)
 140             cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
 141
 142           if (cy != 0)
 143             {
 144               /* Borrow out of w, take twos complement negative to get
 145                  absolute value, flip sign of w.  */
 146               wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
 147               mpn_com (wp, wp, new_wsize);
 148               new_wsize++;
 149               MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
 150               wsize_signed = -wsize_signed;
 151             }
 152         }
 153       else /* wsize < xsize */
 154         {
 155           /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
 156              take twos complement and use an mpn_mul_1 for the rest.  */
 157
 158           mp_limb_t  cy2;
 159
 160           /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
 161           mpn_com (wp, wp, wsize);
 162           cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
 163           cy -= 1;
 164
 165           /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
 166              returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
 167           cy2 = (cy == MP_LIMB_T_MAX);
 168           cy += cy2;
 169           MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
 170           wp[new_wsize] = cy;
 171           new_wsize += (cy != 0);
 172
 173           /* Apply any -1 from above.  The value at wp+wsize is non-zero
 174              because y!=0 and the high limb of x will be non-zero.  */
 175           if (cy2)
 176             MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
 177
 178           wsize_signed = -wsize_signed;
 179         }
 180
 181       /* submul can produce high zero limbs due to cancellation, both when w
 182          has more limbs or x has more  */
 183       MPN_NORMALIZE (wp, new_wsize);
 184     }
 185
 186   SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
 187
 188   ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
 189 }
 190
 191
 192 void
 193 mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
 194 {
 195 #if BITS_PER_ULONG > GMP_NUMB_BITS
 196   if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))
 197     {
 198       mpz_t t;
 199       mp_ptr tp;
 200       mp_size_t xn;
 201       TMP_DECL;
 202       TMP_MARK;
 203       xn = SIZ (x);
 204       MPZ_TMP_INIT (t, ABS (xn) + 1);
 205       tp = PTR (t);
 206       tp[0] = 0;
 207       MPN_COPY (tp + 1, PTR(x), ABS (xn));
 208       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
 209       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
 210       PTR(t) = tp + 1;
 211       SIZ(t) = xn;
 212       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
 213       TMP_FREE;
 214       return;
 215     }
 216 #endif
 217   mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
 218 }
 219
 220 void
 221 mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
 222 {
 223 #if BITS_PER_ULONG > GMP_NUMB_BITS
 224   if (y > GMP_NUMB_MAX && SIZ(x) != 0)
 225     {
 226       mpz_t t;
 227       mp_ptr tp;
 228       mp_size_t xn;
 229       TMP_DECL;
 230       TMP_MARK;
 231       xn = SIZ (x);
 232       MPZ_TMP_INIT (t, ABS (xn) + 1);
 233       tp = PTR (t);
 234       tp[0] = 0;
 235       MPN_COPY (tp + 1, PTR(x), ABS (xn));
 236       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
 237       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
 238       PTR(t) = tp + 1;
 239       SIZ(t) = xn;
 240       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
 241       TMP_FREE;
 242       return;
 243     }
 244 #endif
 245   mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
 246 }