beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / toom_interpolate_16pts.c
blob5afe6641f67365ebd076cd9fd1bf57ed73ae72e5
1 /* Interpolation for the algorithm Toom-Cook 8.5-way.
3 Contributed to the GNU project by Marco Bodrato.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
38 #include "gmp.h"
39 #include "gmp-impl.h"
41 #if GMP_NUMB_BITS < 29
42 #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
43 #endif
45 #if GMP_NUMB_BITS < 28
46 #error Not implemented: divexact_by188513325 and _by182712915 will not work.
47 #endif
50 #if HAVE_NATIVE_mpn_sublsh_n
51 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
52 #else
53 static mp_limb_t
54 DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
56 #if USE_MUL_1 && 0
57 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
58 #else
59 mp_limb_t __cy;
60 __cy = mpn_lshift(ws,src,n,s);
61 return __cy + mpn_sub_n(dst,dst,ws,n);
62 #endif
64 #endif
66 #if HAVE_NATIVE_mpn_addlsh_n
67 #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
68 #else
69 static mp_limb_t
70 DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
72 #if USE_MUL_1 && 0
73 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
74 #else
75 mp_limb_t __cy;
76 __cy = mpn_lshift(ws,src,n,s);
77 return __cy + mpn_add_n(dst,dst,ws,n);
78 #endif
80 #endif
82 #if HAVE_NATIVE_mpn_subrsh
83 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
84 #else
85 /* FIXME: This is not a correct definition, it assumes no carry */
86 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
87 do { \
88 mp_limb_t __cy; \
89 MPN_DECR_U (dst, nd, src[0] >> s); \
90 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
91 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
92 } while (0)
93 #endif
96 /* FIXME: tuneup should decide the best variant */
97 #ifndef AORSMUL_FASTER_AORS_AORSLSH
98 #define AORSMUL_FASTER_AORS_AORSLSH 1
99 #endif
100 #ifndef AORSMUL_FASTER_AORS_2AORSLSH
101 #define AORSMUL_FASTER_AORS_2AORSLSH 1
102 #endif
103 #ifndef AORSMUL_FASTER_2AORSLSH
104 #define AORSMUL_FASTER_2AORSLSH 1
105 #endif
106 #ifndef AORSMUL_FASTER_3AORSLSH
107 #define AORSMUL_FASTER_3AORSLSH 1
108 #endif
110 #if GMP_NUMB_BITS < 43
111 #define BIT_CORRECTION 1
112 #define CORRECTION_BITS GMP_NUMB_BITS
113 #else
114 #define BIT_CORRECTION 0
115 #define CORRECTION_BITS 0
116 #endif
118 #define BINVERT_9 \
119 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
121 #define BINVERT_255 \
122 (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
124 /* FIXME: find some more general expressions for inverses */
125 #if GMP_LIMB_BITS == 32
126 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B))
127 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35))
128 #define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB))
129 #define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5))
130 #define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25))
131 #define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B))
132 #if GMP_NAIL_BITS == 0
133 #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
134 #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
135 #else /* GMP_NAIL_BITS != 0 */
136 #define BINVERT_255x182712915H \
137 (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
138 #define BINVERT_255x188513325H \
139 (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
140 #endif
141 #else
142 #if GMP_LIMB_BITS == 64
143 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B))
144 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35))
145 #define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25))
146 #define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B))
147 #endif
148 #endif
150 #ifndef mpn_divexact_by255
151 #if GMP_NUMB_BITS % 8 == 0
152 #define mpn_divexact_by255(dst,src,size) \
153 (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
154 #else
155 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
156 #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
157 #else
158 #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
159 #endif
160 #endif
161 #endif
163 #ifndef mpn_divexact_by255x4
164 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
165 #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
166 #else
167 #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
168 #endif
169 #endif
171 #ifndef mpn_divexact_by9x16
172 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
173 #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
174 #else
175 #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
176 #endif
177 #endif
179 #ifndef mpn_divexact_by42525x16
180 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
181 #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
182 #else
183 #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
184 #endif
185 #endif
187 #ifndef mpn_divexact_by2835x64
188 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
189 #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
190 #else
191 #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
192 #endif
193 #endif
195 #ifndef mpn_divexact_by255x182712915
196 #if GMP_NUMB_BITS < 36
197 #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
198 /* FIXME: use mpn_bdiv_q_2_pi2 */
199 #endif
200 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
201 #define mpn_divexact_by255x182712915(dst,src,size) \
202 do { \
203 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \
204 mpn_divexact_by255(dst,dst,size); \
205 } while(0)
206 #else
207 #define mpn_divexact_by255x182712915(dst,src,size) \
208 do { \
209 mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \
210 mpn_divexact_by255(dst,dst,size); \
211 } while(0)
212 #endif
213 #else /* GMP_NUMB_BITS > 35 */
214 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
215 #define mpn_divexact_by255x182712915(dst,src,size) \
216 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
217 #else
218 #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
219 #endif
220 #endif /* GMP_NUMB_BITS >?< 36 */
221 #endif
223 #ifndef mpn_divexact_by255x188513325
224 #if GMP_NUMB_BITS < 36
225 #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
226 /* FIXME: use mpn_bdiv_q_1_pi2 */
227 #endif
228 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
229 #define mpn_divexact_by255x188513325(dst,src,size) \
230 do { \
231 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \
232 mpn_divexact_by255(dst,dst,size); \
233 } while(0)
234 #else
235 #define mpn_divexact_by255x188513325(dst,src,size) \
236 do { \
237 mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \
238 mpn_divexact_by255(dst,dst,size); \
239 } while(0)
240 #endif
241 #else /* GMP_NUMB_BITS > 35 */
242 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
243 #define mpn_divexact_by255x188513325(dst,src,size) \
244 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
245 #else
246 #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
247 #endif
248 #endif /* GMP_NUMB_BITS >?< 36 */
249 #endif
251 /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
252 points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
253 +-1/8, 0. More precisely, we want to compute
254 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
255 14), given the 16 (rsp. 15) values:
257 r0 = limit at infinity of f(x) / x^7,
258 r1 = f(8),f(-8),
259 r2 = f(4),f(-4),
260 r3 = f(2),f(-2),
261 r4 = f(1),f(-1),
262 r5 = f(1/4),f(-1/4),
263 r6 = f(1/2),f(-1/2),
264 r7 = f(1/8),f(-1/8),
265 r8 = f(0).
267 All couples of the form f(n),f(-n) must be already mixed with
268 toom_couple_handling(f(n),...,f(-n),...)
270 The result is stored in {pp, spt + 7*n (or 8*n)}.
271 At entry, r8 is stored at {pp, 2n},
272 r6 is stored at {pp + 3n, 3n + 1}.
273 r4 is stored at {pp + 7n, 3n + 1}.
274 r2 is stored at {pp +11n, 3n + 1}.
275 r0 is stored at {pp +15n, spt}.
277 The other values are 3n+1 limbs each (with most significant limbs small).
279 Negative intermediate results are stored two-complemented.
280 Inputs are destroyed.
283 void
284 mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
285 mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
287 mp_limb_t cy;
288 mp_size_t n3;
289 mp_size_t n3p1;
290 n3 = 3 * n;
291 n3p1 = n3 + 1;
293 #define r6 (pp + n3) /* 3n+1 */
294 #define r4 (pp + 7 * n) /* 3n+1 */
295 #define r2 (pp +11 * n) /* 3n+1 */
296 #define r0 (pp +15 * n) /* s+t <= 2*n */
298 ASSERT( spt <= 2 * n );
299 /******************************* interpolation *****************************/
300 if( half != 0) {
301 cy = mpn_sub_n (r4, r4, r0, spt);
302 MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
304 cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
305 MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
306 DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
308 cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
309 MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
310 DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
312 cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
313 #if BIT_CORRECTION
314 cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
315 n3p1 - spt - BIT_CORRECTION, cy);
316 ASSERT (BIT_CORRECTION > 0 || cy == 0);
317 /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
318 cy = r7[n3p1];
319 r7[n3p1] = 0x80;
320 #else
321 MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
322 #endif
323 DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
324 #if BIT_CORRECTION
325 /* FIXME: assumes r7[n3p1] is writable. */
326 ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
327 r7[n3p1] = cy;
328 #endif
331 r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
332 DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
334 #if HAVE_NATIVE_mpn_add_n_sub_n
335 mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
336 #else
337 mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
338 ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
339 MP_PTR_SWAP(r5, wsi);
340 #endif
342 r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
343 DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
345 #if HAVE_NATIVE_mpn_add_n_sub_n
346 mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
347 #else
348 ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
349 mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
350 MP_PTR_SWAP(r3, wsi);
351 #endif
353 cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
354 #if BIT_CORRECTION
355 MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
356 cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
357 cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
358 ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
359 #else
360 r7[n3] -= cy;
361 DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
362 #endif
364 #if HAVE_NATIVE_mpn_add_n_sub_n
365 mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
366 #else
367 mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
368 mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */
369 MP_PTR_SWAP(r7, wsi);
370 #endif
372 r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
374 #if AORSMUL_FASTER_2AORSLSH
375 mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
376 #else
377 DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
378 DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
379 #endif
381 mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
382 #if AORSMUL_FASTER_3AORSLSH
383 mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
384 #else
385 DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
386 DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
387 DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
388 #endif
389 mpn_divexact_by255x188513325(r7, r7, n3p1);
391 mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
392 /* A division by 2835x64 follows. Warning: the operand can be negative! */
393 mpn_divexact_by2835x64(r5, r5, n3p1);
394 if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
395 r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
397 #if AORSMUL_FASTER_AORS_AORSLSH
398 mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
399 #else
400 mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
401 DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
402 #endif
403 #if AORSMUL_FASTER_2AORSLSH
404 mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
405 #else
406 DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
407 DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
408 #endif
409 /* A division by 255x4 follows. Warning: the operand can be negative! */
410 mpn_divexact_by255x4(r6, r6, n3p1);
411 if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
412 r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
414 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
416 ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
417 ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
419 /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
420 DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
421 mpn_submul_1 (r1, r2, n3p1, 1428);
422 mpn_submul_1 (r1, r3, n3p1, 112896);
423 mpn_divexact_by255x182712915(r1, r1, n3p1);
425 ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
426 mpn_divexact_by42525x16(r2, r2, n3p1);
428 #if AORSMUL_FASTER_AORS_2AORSLSH
429 ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
430 #else
431 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
432 ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
433 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
434 #endif
435 ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
436 mpn_divexact_by9x16(r3, r3, n3p1);
438 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
439 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
440 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
442 mpn_add_n (r6, r2, r6, n3p1);
443 ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
444 ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
446 mpn_sub_n (r5, r3, r5, n3p1);
447 ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
448 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
450 mpn_add_n (r7, r1, r7, n3p1);
451 ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
452 ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
454 /* last interpolation steps... */
455 /* ... could be mixed with recomposition
456 ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5|
459 /***************************** recomposition *******************************/
461 pp[] prior to operations:
462 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
464 summation scheme for remaining operations:
465 |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
466 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
467 ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7|
470 cy = mpn_add_n (pp + n, pp + n, r7, n);
471 cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
472 #if HAVE_NATIVE_mpn_add_nc
473 cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
474 #else
475 MPN_INCR_U (r7 + 2 * n, n + 1, cy);
476 cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
477 #endif
478 MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
480 pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
481 cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
482 #if HAVE_NATIVE_mpn_add_nc
483 cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
484 #else
485 MPN_INCR_U (r5 + 2 * n, n + 1, cy);
486 cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
487 #endif
488 MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
490 pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
491 cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
492 #if HAVE_NATIVE_mpn_add_nc
493 cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
494 #else
495 MPN_INCR_U (r3 + 2 * n, n + 1, cy);
496 cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
497 #endif
498 MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
500 pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
501 if ( half ) {
502 cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
503 #if HAVE_NATIVE_mpn_add_nc
504 if(LIKELY(spt > n)) {
505 cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
506 MPN_INCR_U (pp + 16 * n, spt - n, cy);
507 } else {
508 ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
510 #else
511 MPN_INCR_U (r1 + 2 * n, n + 1, cy);
512 if(LIKELY(spt > n)) {
513 cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
514 MPN_INCR_U (pp + 16 * n, spt - n, cy);
515 } else {
516 ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
518 #endif
519 } else {
520 ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
523 #undef r0
524 #undef r2
525 #undef r4
526 #undef r6