source/libs/gmp/gmp-src/mpn/generic/toom_interpolate_16pts.c

   1 /* Interpolation for the algorithm Toom-Cook 8.5-way.
   2
   3    Contributed to the GNU project by Marco Bodrato.
   4
   5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   8
   9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  10
  11 This file is part of the GNU MP Library.
  12
  13 The GNU MP Library is free software; you can redistribute it and/or modify
  14 it under the terms of either:
  15
  16   * the GNU Lesser General Public License as published by the Free
  17     Software Foundation; either version 3 of the License, or (at your
  18     option) any later version.
  19
  20 or
  21
  22   * the GNU General Public License as published by the Free Software
  23     Foundation; either version 2 of the License, or (at your option) any
  24     later version.
  25
  26 or both in parallel, as here.
  27
  28 The GNU MP Library is distributed in the hope that it will be useful, but
  29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  31 for more details.
  32
  33 You should have received copies of the GNU General Public License and the
  34 GNU Lesser General Public License along with the GNU MP Library.  If not,
  35 see https://www.gnu.org/licenses/.  */
  36
  37
  38 #include "gmp.h"
  39 #include "gmp-impl.h"
  40
  41 #if GMP_NUMB_BITS < 29
  42 #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
  43 #endif
  44
  45 #if GMP_NUMB_BITS < 28
  46 #error Not implemented: divexact_by188513325 and _by182712915 will not work.
  47 #endif
  48
  49
  50 #if HAVE_NATIVE_mpn_sublsh_n
  51 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
  52 #else
  53 static mp_limb_t
  54 DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
  55 {
  56 #if USE_MUL_1 && 0
  57   return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
  58 #else
  59   mp_limb_t __cy;
  60   __cy = mpn_lshift(ws,src,n,s);
  61   return    __cy + mpn_sub_n(dst,dst,ws,n);
  62 #endif
  63 }
  64 #endif
  65
  66 #if HAVE_NATIVE_mpn_addlsh_n
  67 #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
  68 #else
  69 static mp_limb_t
  70 DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
  71 {
  72 #if USE_MUL_1 && 0
  73   return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
  74 #else
  75   mp_limb_t __cy;
  76   __cy = mpn_lshift(ws,src,n,s);
  77   return    __cy + mpn_add_n(dst,dst,ws,n);
  78 #endif
  79 }
  80 #endif
  81
  82 #if HAVE_NATIVE_mpn_subrsh
  83 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
  84 #else
  85 /* FIXME: This is not a correct definition, it assumes no carry */
  86 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)                               \
  87 do {                                                                    \
  88   mp_limb_t __cy;                                                       \
  89   MPN_DECR_U (dst, nd, src[0] >> s);                                    \
  90   __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
  91   MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);                         \
  92 } while (0)
  93 #endif
  94
  95
  96 /* FIXME: tuneup should decide the best variant */
  97 #ifndef AORSMUL_FASTER_AORS_AORSLSH
  98 #define AORSMUL_FASTER_AORS_AORSLSH 1
  99 #endif
 100 #ifndef AORSMUL_FASTER_AORS_2AORSLSH
 101 #define AORSMUL_FASTER_AORS_2AORSLSH 1
 102 #endif
 103 #ifndef AORSMUL_FASTER_2AORSLSH
 104 #define AORSMUL_FASTER_2AORSLSH 1
 105 #endif
 106 #ifndef AORSMUL_FASTER_3AORSLSH
 107 #define AORSMUL_FASTER_3AORSLSH 1
 108 #endif
 109
 110 #if GMP_NUMB_BITS < 43
 111 #define BIT_CORRECTION 1
 112 #define CORRECTION_BITS GMP_NUMB_BITS
 113 #else
 114 #define BIT_CORRECTION 0
 115 #define CORRECTION_BITS 0
 116 #endif
 117
 118 #define BINVERT_9 \
 119   ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
 120
 121 #define BINVERT_255 \
 122   (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
 123
 124   /* FIXME: find some more general expressions for inverses */
 125 #if GMP_LIMB_BITS == 32
 126 #define BINVERT_2835  (GMP_NUMB_MASK &          CNST_LIMB(0x53E3771B))
 127 #define BINVERT_42525 (GMP_NUMB_MASK &          CNST_LIMB(0x9F314C35))
 128 #define BINVERT_182712915 (GMP_NUMB_MASK &      CNST_LIMB(0x550659DB))
 129 #define BINVERT_188513325 (GMP_NUMB_MASK &      CNST_LIMB(0xFBC333A5))
 130 #define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25))
 131 #define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B))
 132 #if GMP_NAIL_BITS == 0
 133 #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
 134 #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
 135 #else /* GMP_NAIL_BITS != 0 */
 136 #define BINVERT_255x182712915H \
 137   (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
 138 #define BINVERT_255x188513325H \
 139   (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
 140 #endif
 141 #else
 142 #if GMP_LIMB_BITS == 64
 143 #define BINVERT_2835  (GMP_NUMB_MASK &  CNST_LIMB(0x938CC70553E3771B))
 144 #define BINVERT_42525 (GMP_NUMB_MASK &  CNST_LIMB(0xE7B40D449F314C35))
 145 #define BINVERT_255x182712915  (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25))
 146 #define BINVERT_255x188513325  (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B))
 147 #endif
 148 #endif
 149
 150 #ifndef mpn_divexact_by255
 151 #if GMP_NUMB_BITS % 8 == 0
 152 #define mpn_divexact_by255(dst,src,size) \
 153   (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
 154 #else
 155 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
 156 #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
 157 #else
 158 #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
 159 #endif
 160 #endif
 161 #endif
 162
 163 #ifndef mpn_divexact_by255x4
 164 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
 165 #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
 166 #else
 167 #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
 168 #endif
 169 #endif
 170
 171 #ifndef mpn_divexact_by9x16
 172 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
 173 #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
 174 #else
 175 #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
 176 #endif
 177 #endif
 178
 179 #ifndef mpn_divexact_by42525x16
 180 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
 181 #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
 182 #else
 183 #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
 184 #endif
 185 #endif
 186
 187 #ifndef mpn_divexact_by2835x64
 188 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
 189 #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
 190 #else
 191 #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
 192 #endif
 193 #endif
 194
 195 #ifndef  mpn_divexact_by255x182712915
 196 #if GMP_NUMB_BITS < 36
 197 #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
 198 /* FIXME: use mpn_bdiv_q_2_pi2 */
 199 #endif
 200 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
 201 #define mpn_divexact_by255x182712915(dst,src,size)                              \
 202   do {                                                                          \
 203     mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);    \
 204     mpn_divexact_by255(dst,dst,size);                                           \
 205   } while(0)
 206 #else
 207 #define mpn_divexact_by255x182712915(dst,src,size)      \
 208   do {                                                  \
 209     mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));  \
 210     mpn_divexact_by255(dst,dst,size);                   \
 211   } while(0)
 212 #endif
 213 #else /* GMP_NUMB_BITS > 35 */
 214 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
 215 #define mpn_divexact_by255x182712915(dst,src,size) \
 216   mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
 217 #else
 218 #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
 219 #endif
 220 #endif /* GMP_NUMB_BITS >?< 36 */
 221 #endif
 222
 223 #ifndef  mpn_divexact_by255x188513325
 224 #if GMP_NUMB_BITS < 36
 225 #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
 226 /* FIXME: use mpn_bdiv_q_1_pi2 */
 227 #endif
 228 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
 229 #define mpn_divexact_by255x188513325(dst,src,size)                      \
 230   do {                                                                  \
 231     mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);    \
 232     mpn_divexact_by255(dst,dst,size);                                   \
 233   } while(0)
 234 #else
 235 #define mpn_divexact_by255x188513325(dst,src,size)      \
 236   do {                                                  \
 237     mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));  \
 238     mpn_divexact_by255(dst,dst,size);                   \
 239   } while(0)
 240 #endif
 241 #else /* GMP_NUMB_BITS > 35 */
 242 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
 243 #define mpn_divexact_by255x188513325(dst,src,size) \
 244   mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
 245 #else
 246 #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
 247 #endif
 248 #endif /* GMP_NUMB_BITS >?< 36 */
 249 #endif
 250
 251 /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
 252    points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
 253    +-1/8, 0. More precisely, we want to compute
 254    f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
 255    14), given the 16 (rsp. 15) values:
 256
 257      r0 = limit at infinity of f(x) / x^7,
 258      r1 = f(8),f(-8),
 259      r2 = f(4),f(-4),
 260      r3 = f(2),f(-2),
 261      r4 = f(1),f(-1),
 262      r5 = f(1/4),f(-1/4),
 263      r6 = f(1/2),f(-1/2),
 264      r7 = f(1/8),f(-1/8),
 265      r8 = f(0).
 266
 267    All couples of the form f(n),f(-n) must be already mixed with
 268    toom_couple_handling(f(n),...,f(-n),...)
 269
 270    The result is stored in {pp, spt + 7*n (or 8*n)}.
 271    At entry, r8 is stored at {pp, 2n},
 272    r6 is stored at {pp + 3n, 3n + 1}.
 273    r4 is stored at {pp + 7n, 3n + 1}.
 274    r2 is stored at {pp +11n, 3n + 1}.
 275    r0 is stored at {pp +15n, spt}.
 276
 277    The other values are 3n+1 limbs each (with most significant limbs small).
 278
 279    Negative intermediate results are stored two-complemented.
 280    Inputs are destroyed.
 281 */
 282
 283 void
 284 mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
 285                         mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
 286 {
 287   mp_limb_t cy;
 288   mp_size_t n3;
 289   mp_size_t n3p1;
 290   n3 = 3 * n;
 291   n3p1 = n3 + 1;
 292
 293 #define   r6    (pp + n3)                       /* 3n+1 */
 294 #define   r4    (pp + 7 * n)                    /* 3n+1 */
 295 #define   r2    (pp +11 * n)                    /* 3n+1 */
 296 #define   r0    (pp +15 * n)                    /* s+t <= 2*n */
 297
 298   ASSERT( spt <= 2 * n );
 299   /******************************* interpolation *****************************/
 300   if( half != 0) {
 301     cy = mpn_sub_n (r4, r4, r0, spt);
 302     MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
 303
 304     cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
 305     MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
 306     DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
 307
 308     cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
 309     MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
 310     DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
 311
 312     cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
 313 #if BIT_CORRECTION
 314     cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
 315                     n3p1 - spt - BIT_CORRECTION, cy);
 316     ASSERT (BIT_CORRECTION > 0 || cy == 0);
 317     /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
 318     cy = r7[n3p1];
 319     r7[n3p1] = 0x80;
 320 #else
 321     MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
 322 #endif
 323     DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
 324 #if BIT_CORRECTION
 325     /* FIXME: assumes r7[n3p1] is writable. */
 326     ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
 327     r7[n3p1] = cy;
 328 #endif
 329   };
 330
 331   r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
 332   DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
 333
 334 #if HAVE_NATIVE_mpn_add_n_sub_n
 335   mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
 336 #else
 337   mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
 338   ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
 339   MP_PTR_SWAP(r5, wsi);
 340 #endif
 341
 342   r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
 343   DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
 344
 345 #if HAVE_NATIVE_mpn_add_n_sub_n
 346   mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
 347 #else
 348   ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
 349   mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
 350   MP_PTR_SWAP(r3, wsi);
 351 #endif
 352
 353   cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
 354 #if BIT_CORRECTION
 355   MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
 356   cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
 357   cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
 358   ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
 359 #else
 360   r7[n3] -= cy;
 361   DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
 362 #endif
 363
 364 #if HAVE_NATIVE_mpn_add_n_sub_n
 365   mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
 366 #else
 367   mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
 368   mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
 369   MP_PTR_SWAP(r7, wsi);
 370 #endif
 371
 372   r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
 373
 374 #if AORSMUL_FASTER_2AORSLSH
 375   mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
 376 #else
 377   DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
 378   DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
 379 #endif
 380
 381   mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
 382 #if AORSMUL_FASTER_3AORSLSH
 383   mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
 384 #else
 385   DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
 386   DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
 387   DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
 388 #endif
 389   mpn_divexact_by255x188513325(r7, r7, n3p1);
 390
 391   mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
 392   /* A division by 2835x64 follows. Warning: the operand can be negative! */
 393   mpn_divexact_by2835x64(r5, r5, n3p1);
 394   if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
 395     r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
 396
 397 #if AORSMUL_FASTER_AORS_AORSLSH
 398   mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
 399 #else
 400   mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
 401   DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
 402 #endif
 403 #if AORSMUL_FASTER_2AORSLSH
 404   mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
 405 #else
 406   DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
 407   DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
 408 #endif
 409   /* A division by 255x4 follows. Warning: the operand can be negative! */
 410   mpn_divexact_by255x4(r6, r6, n3p1);
 411   if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
 412     r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
 413
 414   ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
 415
 416   ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
 417   ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
 418
 419   /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
 420   DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
 421   mpn_submul_1 (r1, r2, n3p1, 1428);
 422   mpn_submul_1 (r1, r3, n3p1, 112896);
 423   mpn_divexact_by255x182712915(r1, r1, n3p1);
 424
 425   ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
 426   mpn_divexact_by42525x16(r2, r2, n3p1);
 427
 428 #if AORSMUL_FASTER_AORS_2AORSLSH
 429   ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
 430 #else
 431   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
 432   ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
 433   ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
 434 #endif
 435   ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
 436   mpn_divexact_by9x16(r3, r3, n3p1);
 437
 438   ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
 439   ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
 440   ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
 441
 442   mpn_add_n (r6, r2, r6, n3p1);
 443   ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
 444   ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
 445
 446   mpn_sub_n (r5, r3, r5, n3p1);
 447   ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
 448   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
 449
 450   mpn_add_n (r7, r1, r7, n3p1);
 451   ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
 452   ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
 453
 454   /* last interpolation steps... */
 455   /* ... could be mixed with recomposition
 456         ||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
 457   */
 458
 459   /***************************** recomposition *******************************/
 460   /*
 461     pp[] prior to operations:
 462     |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
 463
 464     summation scheme for remaining operations:
 465     |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
 466     |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
 467         ||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
 468   */
 469
 470   cy = mpn_add_n (pp + n, pp + n, r7, n);
 471   cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
 472 #if HAVE_NATIVE_mpn_add_nc
 473   cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
 474 #else
 475   MPN_INCR_U (r7 + 2 * n, n + 1, cy);
 476   cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
 477 #endif
 478   MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
 479
 480   pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
 481   cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
 482 #if HAVE_NATIVE_mpn_add_nc
 483   cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
 484 #else
 485   MPN_INCR_U (r5 + 2 * n, n + 1, cy);
 486   cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
 487 #endif
 488   MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
 489
 490   pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
 491   cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
 492 #if HAVE_NATIVE_mpn_add_nc
 493   cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
 494 #else
 495   MPN_INCR_U (r3 + 2 * n, n + 1, cy);
 496   cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
 497 #endif
 498   MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
 499
 500   pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
 501   if ( half ) {
 502     cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
 503 #if HAVE_NATIVE_mpn_add_nc
 504     if(LIKELY(spt > n)) {
 505       cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
 506       MPN_INCR_U (pp + 16 * n, spt - n, cy);
 507     } else {
 508       ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
 509     }
 510 #else
 511     MPN_INCR_U (r1 + 2 * n, n + 1, cy);
 512     if(LIKELY(spt > n)) {
 513       cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
 514       MPN_INCR_U (pp + 16 * n, spt - n, cy);
 515     } else {
 516       ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
 517     }
 518 #endif
 519   } else {
 520     ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
 521   }
 522
 523 #undef   r0
 524 #undef   r2
 525 #undef   r4
 526 #undef   r6
 527 }