beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / mulmid.c
blob6b4ea3253dc3a46504b407fe65fcbe94aa65942a
1 /* mpn_mulmid -- middle product
3 Contributed by David Harvey.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2011 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
38 #include "gmp.h"
39 #include "gmp-impl.h"
42 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
45 void
46 mpn_mulmid (mp_ptr rp,
47 mp_srcptr ap, mp_size_t an,
48 mp_srcptr bp, mp_size_t bn)
50 mp_size_t rn, k;
51 mp_ptr scratch, temp;
53 ASSERT (an >= bn);
54 ASSERT (bn >= 1);
55 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
56 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
58 if (bn < MULMID_TOOM42_THRESHOLD)
60 /* region not tall enough to make toom42 worthwhile for any portion */
62 if (an < CHUNK)
64 /* region not too wide either, just call basecase directly */
65 mpn_mulmid_basecase (rp, ap, an, bp, bn);
66 return;
69 /* Region quite wide. For better locality, use basecase on chunks:
71 AAABBBCC..
72 .AAABBBCC.
73 ..AAABBBCC
76 k = CHUNK - bn + 1; /* number of diagonals per chunk */
78 /* first chunk (marked A in the above diagram) */
79 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
81 /* remaining chunks (B, C, etc) */
82 an -= k;
84 while (an >= CHUNK)
86 mp_limb_t t0, t1, cy;
87 ap += k, rp += k;
88 t0 = rp[0], t1 = rp[1];
89 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
90 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
91 MPN_INCR_U (rp + 1, k + 1, t1 + cy);
92 an -= k;
95 if (an >= bn)
97 /* last remaining chunk */
98 mp_limb_t t0, t1, cy;
99 ap += k, rp += k;
100 t0 = rp[0], t1 = rp[1];
101 mpn_mulmid_basecase (rp, ap, an, bp, bn);
102 ADDC_LIMB (cy, rp[0], rp[0], t0);
103 MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
106 return;
109 /* region is tall enough for toom42 */
111 rn = an - bn + 1;
113 if (rn < MULMID_TOOM42_THRESHOLD)
115 /* region not wide enough to make toom42 worthwhile for any portion */
117 TMP_DECL;
119 if (bn < CHUNK)
121 /* region not too tall either, just call basecase directly */
122 mpn_mulmid_basecase (rp, ap, an, bp, bn);
123 return;
126 /* Region quite tall. For better locality, use basecase on chunks:
128 AAAAA....
129 .AAAAA...
130 ..BBBBB..
131 ...BBBBB.
132 ....CCCCC
135 TMP_MARK;
137 temp = TMP_ALLOC_LIMBS (rn + 2);
139 /* first chunk (marked A in the above diagram) */
140 bp += bn - CHUNK, an -= bn - CHUNK;
141 mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
143 /* remaining chunks (B, C, etc) */
144 bn -= CHUNK;
146 while (bn >= CHUNK)
148 ap += CHUNK, bp -= CHUNK;
149 mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
150 mpn_add_n (rp, rp, temp, rn + 2);
151 bn -= CHUNK;
154 if (bn)
156 /* last remaining chunk */
157 ap += CHUNK, bp -= bn;
158 mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
159 mpn_add_n (rp, rp, temp, rn + 2);
162 TMP_FREE;
163 return;
166 /* we're definitely going to use toom42 somewhere */
168 if (bn > rn)
170 /* slice region into chunks, use toom42 on all chunks except possibly
171 the last:
173 AA....
174 .AA...
175 ..BB..
176 ...BB.
177 ....CC
180 TMP_DECL;
181 TMP_MARK;
183 temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
184 scratch = temp + rn + 2;
186 /* first chunk (marked A in the above diagram) */
187 bp += bn - rn;
188 mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
190 /* remaining chunks (B, C, etc) */
191 bn -= rn;
193 while (bn >= rn)
195 ap += rn, bp -= rn;
196 mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
197 mpn_add_n (rp, rp, temp, rn + 2);
198 bn -= rn;
201 if (bn)
203 /* last remaining chunk */
204 ap += rn, bp -= bn;
205 mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
206 mpn_add_n (rp, rp, temp, rn + 2);
209 TMP_FREE;
211 else
213 /* slice region into chunks, use toom42 on all chunks except possibly
214 the last:
216 AAABBBCC..
217 .AAABBBCC.
218 ..AAABBBCC
221 TMP_DECL;
222 TMP_MARK;
224 scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
226 /* first chunk (marked A in the above diagram) */
227 mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
229 /* remaining chunks (B, C, etc) */
230 rn -= bn;
232 while (rn >= bn)
234 mp_limb_t t0, t1, cy;
235 ap += bn, rp += bn;
236 t0 = rp[0], t1 = rp[1];
237 mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
238 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
239 MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
240 rn -= bn;
243 TMP_FREE;
245 if (rn)
247 /* last remaining chunk */
248 mp_limb_t t0, t1, cy;
249 ap += bn, rp += bn;
250 t0 = rp[0], t1 = rp[1];
251 mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
252 ADDC_LIMB (cy, rp[0], rp[0], t0);
253 MPN_INCR_U (rp + 1, rn + 1, t1 + cy);