stdlib/mul_n.c

   1 /* mpn_mul_n -- Multiply two natural numbers of length n.
   2
   3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU Lesser General Public License as published by
   9 the Free Software Foundation; either version 2.1 of the License, or (at your
  10 option) any later version.
  11
  12 The GNU MP Library is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15 License for more details.
  16
  17 You should have received a copy of the GNU Lesser General Public License
  18 along with the GNU MP Library; see the file COPYING.LIB.  If not, see
  19 <https://www.gnu.org/licenses/>.  */
  20
  21 #include <gmp.h>
  22 #include "gmp-impl.h"
  23
  24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  25    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
  26    always stored.  Return the most significant limb.
  27
  28    Argument constraints:
  29    1. PRODP != UP and PRODP != VP, i.e. the destination
  30       must be distinct from the multiplier and the multiplicand.  */
  31
  32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  33    value which is good on most machines.  */
  34 #ifndef KARATSUBA_THRESHOLD
  35 #define KARATSUBA_THRESHOLD 32
  36 #endif
  37
  38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
  39 #if KARATSUBA_THRESHOLD < 2
  40 #undef KARATSUBA_THRESHOLD
  41 #define KARATSUBA_THRESHOLD 2
  42 #endif
  43
  44 /* Handle simple cases with traditional multiplication.
  45
  46    This is the most critical code of multiplication.  All multiplies rely
  47    on this, both small and huge.  Small ones arrive here immediately.  Huge
  48    ones arrive here as this is the base case for Karatsuba's recursive
  49    algorithm below.  */
  50
  51 void
  52 impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  53 {
  54   mp_size_t i;
  55   mp_limb_t cy_limb;
  56   mp_limb_t v_limb;
  57
  58   /* Multiply by the first limb in V separately, as the result can be
  59      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  60   v_limb = vp[0];
  61   if (v_limb <= 1)
  62     {
  63       if (v_limb == 1)
  64         MPN_COPY (prodp, up, size);
  65       else
  66         MPN_ZERO (prodp, size);
  67       cy_limb = 0;
  68     }
  69   else
  70     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  71
  72   prodp[size] = cy_limb;
  73   prodp++;
  74
  75   /* For each iteration in the outer loop, multiply one limb from
  76      U with one limb from V, and add it to PROD.  */
  77   for (i = 1; i < size; i++)
  78     {
  79       v_limb = vp[i];
  80       if (v_limb <= 1)
  81         {
  82           cy_limb = 0;
  83           if (v_limb == 1)
  84             cy_limb = mpn_add_n (prodp, prodp, up, size);
  85         }
  86       else
  87         cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  88
  89       prodp[size] = cy_limb;
  90       prodp++;
  91     }
  92 }
  93
  94 void
  95 impn_mul_n (mp_ptr prodp,
  96              mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
  97 {
  98   if ((size & 1) != 0)
  99     {
 100       /* The size is odd, the code code below doesn't handle that.
 101          Multiply the least significant (size - 1) limbs with a recursive
 102          call, and handle the most significant limb of S1 and S2
 103          separately.  */
 104       /* A slightly faster way to do this would be to make the Karatsuba
 105          code below behave as if the size were even, and let it check for
 106          odd size in the end.  I.e., in essence move this code to the end.
 107          Doing so would save us a recursive call, and potentially make the
 108          stack grow a lot less.  */
 109
 110       mp_size_t esize = size - 1;       /* even size */
 111       mp_limb_t cy_limb;
 112
 113       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
 114       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
 115       prodp[esize + esize] = cy_limb;
 116       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
 117
 118       prodp[esize + size] = cy_limb;
 119     }
 120   else
 121     {
 122       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
 123
 124          Split U in two pieces, U1 and U0, such that
 125          U = U0 + U1*(B**n),
 126          and V in V1 and V0, such that
 127          V = V0 + V1*(B**n).
 128
 129          UV is then computed recursively using the identity
 130
 131                 2n   n          n                     n
 132          UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
 133                         1 1        1  0   0  1              0 0
 134
 135          Where B = 2**BITS_PER_MP_LIMB.  */
 136
 137       mp_size_t hsize = size >> 1;
 138       mp_limb_t cy;
 139       int negflg;
 140
 141       /*** Product H.    ________________  ________________
 142                         |_____U1 x V1____||____U0 x V0_____|  */
 143       /* Put result in upper part of PROD and pass low part of TSPACE
 144          as new TSPACE.  */
 145       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
 146
 147       /*** Product M.    ________________
 148                         |_(U1-U0)(V0-V1)_|  */
 149       if (mpn_cmp (up + hsize, up, hsize) >= 0)
 150         {
 151           mpn_sub_n (prodp, up + hsize, up, hsize);
 152           negflg = 0;
 153         }
 154       else
 155         {
 156           mpn_sub_n (prodp, up, up + hsize, hsize);
 157           negflg = 1;
 158         }
 159       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
 160         {
 161           mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
 162           negflg ^= 1;
 163         }
 164       else
 165         {
 166           mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
 167           /* No change of NEGFLG.  */
 168         }
 169       /* Read temporary operands from low part of PROD.
 170          Put result in low part of TSPACE using upper part of TSPACE
 171          as new TSPACE.  */
 172       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
 173
 174       /*** Add/copy product H.  */
 175       MPN_COPY (prodp + hsize, prodp + size, hsize);
 176       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
 177
 178       /*** Add product M (if NEGFLG M is a negative number).  */
 179       if (negflg)
 180         cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
 181       else
 182         cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 183
 184       /*** Product L.    ________________  ________________
 185                         |________________||____U0 x V0_____|  */
 186       /* Read temporary operands from low part of PROD.
 187          Put result in low part of TSPACE using upper part of TSPACE
 188          as new TSPACE.  */
 189       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
 190
 191       /*** Add/copy Product L (twice).  */
 192
 193       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 194       if (cy)
 195         mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
 196
 197       MPN_COPY (prodp, tspace, hsize);
 198       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 199       if (cy)
 200         mpn_add_1 (prodp + size, prodp + size, size, 1);
 201     }
 202 }
 203
 204 void
 205 impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
 206 {
 207   mp_size_t i;
 208   mp_limb_t cy_limb;
 209   mp_limb_t v_limb;
 210
 211   /* Multiply by the first limb in V separately, as the result can be
 212      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
 213   v_limb = up[0];
 214   if (v_limb <= 1)
 215     {
 216       if (v_limb == 1)
 217         MPN_COPY (prodp, up, size);
 218       else
 219         MPN_ZERO (prodp, size);
 220       cy_limb = 0;
 221     }
 222   else
 223     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
 224
 225   prodp[size] = cy_limb;
 226   prodp++;
 227
 228   /* For each iteration in the outer loop, multiply one limb from
 229      U with one limb from V, and add it to PROD.  */
 230   for (i = 1; i < size; i++)
 231     {
 232       v_limb = up[i];
 233       if (v_limb <= 1)
 234         {
 235           cy_limb = 0;
 236           if (v_limb == 1)
 237             cy_limb = mpn_add_n (prodp, prodp, up, size);
 238         }
 239       else
 240         cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
 241
 242       prodp[size] = cy_limb;
 243       prodp++;
 244     }
 245 }
 246
 247 void
 248 impn_sqr_n (mp_ptr prodp,
 249              mp_srcptr up, mp_size_t size, mp_ptr tspace)
 250 {
 251   if ((size & 1) != 0)
 252     {
 253       /* The size is odd, the code code below doesn't handle that.
 254          Multiply the least significant (size - 1) limbs with a recursive
 255          call, and handle the most significant limb of S1 and S2
 256          separately.  */
 257       /* A slightly faster way to do this would be to make the Karatsuba
 258          code below behave as if the size were even, and let it check for
 259          odd size in the end.  I.e., in essence move this code to the end.
 260          Doing so would save us a recursive call, and potentially make the
 261          stack grow a lot less.  */
 262
 263       mp_size_t esize = size - 1;       /* even size */
 264       mp_limb_t cy_limb;
 265
 266       MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
 267       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
 268       prodp[esize + esize] = cy_limb;
 269       cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
 270
 271       prodp[esize + size] = cy_limb;
 272     }
 273   else
 274     {
 275       mp_size_t hsize = size >> 1;
 276       mp_limb_t cy;
 277
 278       /*** Product H.    ________________  ________________
 279                         |_____U1 x U1____||____U0 x U0_____|  */
 280       /* Put result in upper part of PROD and pass low part of TSPACE
 281          as new TSPACE.  */
 282       MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
 283
 284       /*** Product M.    ________________
 285                         |_(U1-U0)(U0-U1)_|  */
 286       if (mpn_cmp (up + hsize, up, hsize) >= 0)
 287         {
 288           mpn_sub_n (prodp, up + hsize, up, hsize);
 289         }
 290       else
 291         {
 292           mpn_sub_n (prodp, up, up + hsize, hsize);
 293         }
 294
 295       /* Read temporary operands from low part of PROD.
 296          Put result in low part of TSPACE using upper part of TSPACE
 297          as new TSPACE.  */
 298       MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
 299
 300       /*** Add/copy product H.  */
 301       MPN_COPY (prodp + hsize, prodp + size, hsize);
 302       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
 303
 304       /*** Add product M (if NEGFLG M is a negative number).  */
 305       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
 306
 307       /*** Product L.    ________________  ________________
 308                         |________________||____U0 x U0_____|  */
 309       /* Read temporary operands from low part of PROD.
 310          Put result in low part of TSPACE using upper part of TSPACE
 311          as new TSPACE.  */
 312       MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
 313
 314       /*** Add/copy Product L (twice).  */
 315
 316       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 317       if (cy)
 318         mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
 319
 320       MPN_COPY (prodp, tspace, hsize);
 321       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 322       if (cy)
 323         mpn_add_1 (prodp + size, prodp + size, size, 1);
 324     }
 325 }
 326
 327 /* This should be made into an inline function in gmp.h.  */
 328 void
 329 mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
 330 {
 331   TMP_DECL (marker);
 332   TMP_MARK (marker);
 333   if (up == vp)
 334     {
 335       if (size < KARATSUBA_THRESHOLD)
 336         {
 337           impn_sqr_n_basecase (prodp, up, size);
 338         }
 339       else
 340         {
 341           mp_ptr tspace;
 342           tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
 343           impn_sqr_n (prodp, up, size, tspace);
 344         }
 345     }
 346   else
 347     {
 348       if (size < KARATSUBA_THRESHOLD)
 349         {
 350           impn_mul_n_basecase (prodp, up, vp, size);
 351         }
 352       else
 353         {
 354           mp_ptr tspace;
 355           tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
 356           impn_mul_n (prodp, up, vp, size, tspace);
 357         }
 358     }
 359   TMP_FREE (marker);
 360 }