beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / toom_interpolate_5pts.c
blob9fa5f0b7a6044c67f7f5eba0ee6ae32f4aeac283
1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
3 Contributed to the GNU project by Robert Harley.
4 Improvements by Paul Zimmermann and Marco Bodrato.
6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
27 or both in parallel, as here.
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
38 #include "gmp.h"
39 #include "gmp-impl.h"
41 void
42 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
43 mp_size_t k, mp_size_t twor, int sa,
44 mp_limb_t vinf0)
46 mp_limb_t cy, saved;
47 mp_size_t twok;
48 mp_size_t kk1;
49 mp_ptr c1, v1, c3, vinf;
51 twok = k + k;
52 kk1 = twok + 1;
54 c1 = c + k;
55 v1 = c1 + k;
56 c3 = v1 + k;
57 vinf = c3 + k;
59 #define v0 (c)
60 /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) =
61 thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0)
63 if (sa)
64 ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
65 else
66 ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
68 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
69 v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */
71 ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */
72 /* (5 3 1 1 0)*/
74 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
75 v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */
77 /* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
78 tm1 >= 0 (0 1 0 1 0)
79 No carry comes out from {v1, kk1} +/- {vm1, kk1},
80 and the division by two is exact.
81 If (sa!=0) the sign of vm1 is negative */
82 if (sa)
84 #ifdef HAVE_NATIVE_mpn_rsh1add_n
85 mpn_rsh1add_n (vm1, v1, vm1, kk1);
86 #else
87 ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
88 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
89 #endif
91 else
93 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
94 mpn_rsh1sub_n (vm1, v1, vm1, kk1);
95 #else
96 ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
97 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
98 #endif
101 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
102 v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
104 /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
105 t1 >= 0
107 vinf[0] -= mpn_sub_n (v1, v1, c, twok);
109 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
110 v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
112 /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
113 t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
115 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
116 mpn_rsh1sub_n (v2, v2, v1, kk1);
117 #else
118 ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
119 ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
120 #endif
122 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
123 v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */
125 /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
126 result is v1 >= 0
128 ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
130 /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
131 cy = mpn_add_n (c1, c1, vm1, kk1);
132 MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
133 /* Memory allocated for vm1 is now free, it can be recycled ...*/
135 /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
136 result is v2 >= 0 */
137 saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */
138 vinf[0] = vinf0; /* Set the right value for vinf0 */
139 #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
140 cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
141 #else
142 /* Overwrite unused vm1 */
143 cy = mpn_lshift (vm1, vinf, twor, 1);
144 cy += mpn_sub_n (v2, v2, vm1, twor);
145 #endif
146 MPN_DECR_U (v2 + twor, kk1 - twor, cy);
148 /* Current matrix is
149 [1 0 0 0 0; vinf
150 0 1 0 0 0; v2
151 1 0 1 0 0; v1
152 0 1 0 1 0; vm1
153 0 0 0 0 1] v0
154 Some values already are in-place (we added vm1 in the correct position)
155 | vinf| v1 | v0 |
156 | vm1 |
157 One still is in a separated area
158 | +v2 |
159 We have to compute v1-=vinf; vm1 -= v2,
160 |-vinf|
161 | -v2 |
162 Carefully reordering operations we can avoid to compute twice the sum
163 of the high half of v2 plus the low half of vinf.
166 /* Add the high half of t2 in {vinf} */
167 if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */
168 cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
169 MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
170 } else { /* triggered only by very unbalanced cases like
171 (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
172 ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
174 /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
175 result is >= 0 */
176 /* Side effect: we also subtracted (high half) vm1 -= v2 */
177 cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */
178 vinf0 = vinf[0]; /* Save again the right value for vinf0 */
179 vinf[0] = saved;
180 MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */
182 /* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
183 Operate only on the low half.
185 cy = mpn_sub_n (c1, c1, v2, k);
186 MPN_DECR_U (v1, kk1, cy);
188 /********************* Beginning the final phase **********************/
190 /* Most of the recomposition was done */
192 /* add t2 in {c+3k, ...}, but only the low half */
193 cy = mpn_add_n (c3, c3, v2, k);
194 vinf[0] += cy;
195 ASSERT(vinf[0] >= cy); /* No carry */
196 MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
198 #undef v0